[AArch64] Make vabs<q>_f<32, 64> a tree/gimple intrinsic.
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob6f5ca8ec6d64716232d091a226d830e88e1ce206
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2013 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 typedef __builtin_aarch64_simd_qi int8x8_t
33 __attribute__ ((__vector_size__ (8)));
34 typedef __builtin_aarch64_simd_hi int16x4_t
35 __attribute__ ((__vector_size__ (8)));
36 typedef __builtin_aarch64_simd_si int32x2_t
37 __attribute__ ((__vector_size__ (8)));
38 typedef int64_t int64x1_t;
39 typedef int32_t int32x1_t;
40 typedef int16_t int16x1_t;
41 typedef int8_t int8x1_t;
42 typedef double float64x1_t;
43 typedef __builtin_aarch64_simd_sf float32x2_t
44 __attribute__ ((__vector_size__ (8)));
45 typedef __builtin_aarch64_simd_poly8 poly8x8_t
46 __attribute__ ((__vector_size__ (8)));
47 typedef __builtin_aarch64_simd_poly16 poly16x4_t
48 __attribute__ ((__vector_size__ (8)));
49 typedef __builtin_aarch64_simd_uqi uint8x8_t
50 __attribute__ ((__vector_size__ (8)));
51 typedef __builtin_aarch64_simd_uhi uint16x4_t
52 __attribute__ ((__vector_size__ (8)));
53 typedef __builtin_aarch64_simd_usi uint32x2_t
54 __attribute__ ((__vector_size__ (8)));
55 typedef uint64_t uint64x1_t;
56 typedef uint32_t uint32x1_t;
57 typedef uint16_t uint16x1_t;
58 typedef uint8_t uint8x1_t;
59 typedef __builtin_aarch64_simd_qi int8x16_t
60 __attribute__ ((__vector_size__ (16)));
61 typedef __builtin_aarch64_simd_hi int16x8_t
62 __attribute__ ((__vector_size__ (16)));
63 typedef __builtin_aarch64_simd_si int32x4_t
64 __attribute__ ((__vector_size__ (16)));
65 typedef __builtin_aarch64_simd_di int64x2_t
66 __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_sf float32x4_t
68 __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_df float64x2_t
70 __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_poly8 poly8x16_t
72 __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_poly16 poly16x8_t
74 __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_uqi uint8x16_t
76 __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_uhi uint16x8_t
78 __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_usi uint32x4_t
80 __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_udi uint64x2_t
82 __attribute__ ((__vector_size__ (16)));
84 typedef float float32_t;
85 typedef double float64_t;
86 typedef __builtin_aarch64_simd_poly8 poly8_t;
87 typedef __builtin_aarch64_simd_poly16 poly16_t;
89 typedef struct int8x8x2_t
91 int8x8_t val[2];
92 } int8x8x2_t;
94 typedef struct int8x16x2_t
96 int8x16_t val[2];
97 } int8x16x2_t;
99 typedef struct int16x4x2_t
101 int16x4_t val[2];
102 } int16x4x2_t;
104 typedef struct int16x8x2_t
106 int16x8_t val[2];
107 } int16x8x2_t;
109 typedef struct int32x2x2_t
111 int32x2_t val[2];
112 } int32x2x2_t;
114 typedef struct int32x4x2_t
116 int32x4_t val[2];
117 } int32x4x2_t;
119 typedef struct int64x1x2_t
121 int64x1_t val[2];
122 } int64x1x2_t;
124 typedef struct int64x2x2_t
126 int64x2_t val[2];
127 } int64x2x2_t;
129 typedef struct uint8x8x2_t
131 uint8x8_t val[2];
132 } uint8x8x2_t;
134 typedef struct uint8x16x2_t
136 uint8x16_t val[2];
137 } uint8x16x2_t;
139 typedef struct uint16x4x2_t
141 uint16x4_t val[2];
142 } uint16x4x2_t;
144 typedef struct uint16x8x2_t
146 uint16x8_t val[2];
147 } uint16x8x2_t;
149 typedef struct uint32x2x2_t
151 uint32x2_t val[2];
152 } uint32x2x2_t;
154 typedef struct uint32x4x2_t
156 uint32x4_t val[2];
157 } uint32x4x2_t;
159 typedef struct uint64x1x2_t
161 uint64x1_t val[2];
162 } uint64x1x2_t;
164 typedef struct uint64x2x2_t
166 uint64x2_t val[2];
167 } uint64x2x2_t;
169 typedef struct float32x2x2_t
171 float32x2_t val[2];
172 } float32x2x2_t;
174 typedef struct float32x4x2_t
176 float32x4_t val[2];
177 } float32x4x2_t;
179 typedef struct float64x2x2_t
181 float64x2_t val[2];
182 } float64x2x2_t;
184 typedef struct float64x1x2_t
186 float64x1_t val[2];
187 } float64x1x2_t;
189 typedef struct poly8x8x2_t
191 poly8x8_t val[2];
192 } poly8x8x2_t;
194 typedef struct poly8x16x2_t
196 poly8x16_t val[2];
197 } poly8x16x2_t;
199 typedef struct poly16x4x2_t
201 poly16x4_t val[2];
202 } poly16x4x2_t;
204 typedef struct poly16x8x2_t
206 poly16x8_t val[2];
207 } poly16x8x2_t;
209 typedef struct int8x8x3_t
211 int8x8_t val[3];
212 } int8x8x3_t;
214 typedef struct int8x16x3_t
216 int8x16_t val[3];
217 } int8x16x3_t;
219 typedef struct int16x4x3_t
221 int16x4_t val[3];
222 } int16x4x3_t;
224 typedef struct int16x8x3_t
226 int16x8_t val[3];
227 } int16x8x3_t;
229 typedef struct int32x2x3_t
231 int32x2_t val[3];
232 } int32x2x3_t;
234 typedef struct int32x4x3_t
236 int32x4_t val[3];
237 } int32x4x3_t;
239 typedef struct int64x1x3_t
241 int64x1_t val[3];
242 } int64x1x3_t;
244 typedef struct int64x2x3_t
246 int64x2_t val[3];
247 } int64x2x3_t;
249 typedef struct uint8x8x3_t
251 uint8x8_t val[3];
252 } uint8x8x3_t;
254 typedef struct uint8x16x3_t
256 uint8x16_t val[3];
257 } uint8x16x3_t;
259 typedef struct uint16x4x3_t
261 uint16x4_t val[3];
262 } uint16x4x3_t;
264 typedef struct uint16x8x3_t
266 uint16x8_t val[3];
267 } uint16x8x3_t;
269 typedef struct uint32x2x3_t
271 uint32x2_t val[3];
272 } uint32x2x3_t;
274 typedef struct uint32x4x3_t
276 uint32x4_t val[3];
277 } uint32x4x3_t;
279 typedef struct uint64x1x3_t
281 uint64x1_t val[3];
282 } uint64x1x3_t;
284 typedef struct uint64x2x3_t
286 uint64x2_t val[3];
287 } uint64x2x3_t;
289 typedef struct float32x2x3_t
291 float32x2_t val[3];
292 } float32x2x3_t;
294 typedef struct float32x4x3_t
296 float32x4_t val[3];
297 } float32x4x3_t;
299 typedef struct float64x2x3_t
301 float64x2_t val[3];
302 } float64x2x3_t;
304 typedef struct float64x1x3_t
306 float64x1_t val[3];
307 } float64x1x3_t;
309 typedef struct poly8x8x3_t
311 poly8x8_t val[3];
312 } poly8x8x3_t;
314 typedef struct poly8x16x3_t
316 poly8x16_t val[3];
317 } poly8x16x3_t;
319 typedef struct poly16x4x3_t
321 poly16x4_t val[3];
322 } poly16x4x3_t;
324 typedef struct poly16x8x3_t
326 poly16x8_t val[3];
327 } poly16x8x3_t;
329 typedef struct int8x8x4_t
331 int8x8_t val[4];
332 } int8x8x4_t;
334 typedef struct int8x16x4_t
336 int8x16_t val[4];
337 } int8x16x4_t;
339 typedef struct int16x4x4_t
341 int16x4_t val[4];
342 } int16x4x4_t;
344 typedef struct int16x8x4_t
346 int16x8_t val[4];
347 } int16x8x4_t;
349 typedef struct int32x2x4_t
351 int32x2_t val[4];
352 } int32x2x4_t;
354 typedef struct int32x4x4_t
356 int32x4_t val[4];
357 } int32x4x4_t;
359 typedef struct int64x1x4_t
361 int64x1_t val[4];
362 } int64x1x4_t;
364 typedef struct int64x2x4_t
366 int64x2_t val[4];
367 } int64x2x4_t;
369 typedef struct uint8x8x4_t
371 uint8x8_t val[4];
372 } uint8x8x4_t;
374 typedef struct uint8x16x4_t
376 uint8x16_t val[4];
377 } uint8x16x4_t;
379 typedef struct uint16x4x4_t
381 uint16x4_t val[4];
382 } uint16x4x4_t;
384 typedef struct uint16x8x4_t
386 uint16x8_t val[4];
387 } uint16x8x4_t;
389 typedef struct uint32x2x4_t
391 uint32x2_t val[4];
392 } uint32x2x4_t;
394 typedef struct uint32x4x4_t
396 uint32x4_t val[4];
397 } uint32x4x4_t;
399 typedef struct uint64x1x4_t
401 uint64x1_t val[4];
402 } uint64x1x4_t;
404 typedef struct uint64x2x4_t
406 uint64x2_t val[4];
407 } uint64x2x4_t;
409 typedef struct float32x2x4_t
411 float32x2_t val[4];
412 } float32x2x4_t;
414 typedef struct float32x4x4_t
416 float32x4_t val[4];
417 } float32x4x4_t;
419 typedef struct float64x2x4_t
421 float64x2_t val[4];
422 } float64x2x4_t;
424 typedef struct float64x1x4_t
426 float64x1_t val[4];
427 } float64x1x4_t;
429 typedef struct poly8x8x4_t
431 poly8x8_t val[4];
432 } poly8x8x4_t;
434 typedef struct poly8x16x4_t
436 poly8x16_t val[4];
437 } poly8x16x4_t;
439 typedef struct poly16x4x4_t
441 poly16x4_t val[4];
442 } poly16x4x4_t;
444 typedef struct poly16x8x4_t
446 poly16x8_t val[4];
447 } poly16x8x4_t;
450 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
451 vadd_s8 (int8x8_t __a, int8x8_t __b)
453 return __a + __b;
456 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
457 vadd_s16 (int16x4_t __a, int16x4_t __b)
459 return __a + __b;
462 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
463 vadd_s32 (int32x2_t __a, int32x2_t __b)
465 return __a + __b;
468 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
469 vadd_f32 (float32x2_t __a, float32x2_t __b)
471 return __a + __b;
474 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
475 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
477 return __a + __b;
480 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
481 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
483 return __a + __b;
486 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
487 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
489 return __a + __b;
492 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
493 vadd_s64 (int64x1_t __a, int64x1_t __b)
495 return __a + __b;
498 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
499 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
501 return __a + __b;
504 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
505 vaddq_s8 (int8x16_t __a, int8x16_t __b)
507 return __a + __b;
510 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
511 vaddq_s16 (int16x8_t __a, int16x8_t __b)
513 return __a + __b;
516 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
517 vaddq_s32 (int32x4_t __a, int32x4_t __b)
519 return __a + __b;
522 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
523 vaddq_s64 (int64x2_t __a, int64x2_t __b)
525 return __a + __b;
528 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
529 vaddq_f32 (float32x4_t __a, float32x4_t __b)
531 return __a + __b;
534 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
535 vaddq_f64 (float64x2_t __a, float64x2_t __b)
537 return __a + __b;
540 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
541 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
543 return __a + __b;
546 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
547 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
549 return __a + __b;
552 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
553 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
555 return __a + __b;
558 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
559 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
561 return __a + __b;
564 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
565 vaddl_s8 (int8x8_t __a, int8x8_t __b)
567 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
570 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
571 vaddl_s16 (int16x4_t __a, int16x4_t __b)
573 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
576 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
577 vaddl_s32 (int32x2_t __a, int32x2_t __b)
579 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
582 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
583 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
585 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
586 (int8x8_t) __b);
589 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
590 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
592 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
593 (int16x4_t) __b);
596 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
597 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
599 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
600 (int32x2_t) __b);
603 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
604 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
606 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
609 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
610 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
612 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
615 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
616 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
618 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
621 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
622 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
624 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
625 (int8x16_t) __b);
628 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
629 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
631 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
632 (int16x8_t) __b);
635 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
636 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
638 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
639 (int32x4_t) __b);
642 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
643 vaddw_s8 (int16x8_t __a, int8x8_t __b)
645 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
648 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
649 vaddw_s16 (int32x4_t __a, int16x4_t __b)
651 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
654 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
655 vaddw_s32 (int64x2_t __a, int32x2_t __b)
657 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
660 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
661 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
663 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
664 (int8x8_t) __b);
667 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
668 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
670 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
671 (int16x4_t) __b);
674 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
675 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
677 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
678 (int32x2_t) __b);
681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
682 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
684 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
687 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
688 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
690 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
693 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
694 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
696 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
699 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
700 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
702 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
703 (int8x16_t) __b);
706 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
707 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
709 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
710 (int16x8_t) __b);
713 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
714 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
716 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
717 (int32x4_t) __b);
720 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
721 vhadd_s8 (int8x8_t __a, int8x8_t __b)
723 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
726 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
727 vhadd_s16 (int16x4_t __a, int16x4_t __b)
729 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
732 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
733 vhadd_s32 (int32x2_t __a, int32x2_t __b)
735 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
738 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
739 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
741 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
742 (int8x8_t) __b);
745 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
746 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
748 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
749 (int16x4_t) __b);
752 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
753 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
755 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
756 (int32x2_t) __b);
759 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
760 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
762 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
765 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
766 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
768 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
771 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
772 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
774 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
777 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
778 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
780 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
781 (int8x16_t) __b);
784 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
785 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
787 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
788 (int16x8_t) __b);
791 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
792 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
794 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
795 (int32x4_t) __b);
798 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
799 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
801 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
804 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
805 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
807 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
810 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
811 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
813 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
816 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
817 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
819 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
820 (int8x8_t) __b);
823 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
824 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
826 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
827 (int16x4_t) __b);
830 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
831 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
833 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
834 (int32x2_t) __b);
837 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
838 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
840 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
843 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
844 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
846 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
849 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
850 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
852 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
855 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
856 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
858 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
859 (int8x16_t) __b);
862 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
863 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
865 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
866 (int16x8_t) __b);
869 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
870 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
872 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
873 (int32x4_t) __b);
876 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
877 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
879 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
882 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
883 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
885 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
888 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
889 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
891 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
894 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
895 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
897 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
898 (int16x8_t) __b);
901 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
902 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
904 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
905 (int32x4_t) __b);
908 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
909 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
911 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
912 (int64x2_t) __b);
915 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
916 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
918 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
921 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
922 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
924 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
927 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
928 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
930 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
933 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
934 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
936 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
937 (int16x8_t) __b);
940 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
941 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
943 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
944 (int32x4_t) __b);
947 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
948 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
950 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
951 (int64x2_t) __b);
954 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
955 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
957 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
960 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
961 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
963 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
966 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
967 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
969 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
972 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
973 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
975 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
976 (int16x8_t) __b,
977 (int16x8_t) __c);
980 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
981 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
983 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
984 (int32x4_t) __b,
985 (int32x4_t) __c);
988 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
989 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
991 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
992 (int64x2_t) __b,
993 (int64x2_t) __c);
996 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
997 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
999 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1002 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1003 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1005 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1008 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1009 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1011 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1014 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1015 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1017 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1018 (int16x8_t) __b,
1019 (int16x8_t) __c);
1022 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1023 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1025 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1026 (int32x4_t) __b,
1027 (int32x4_t) __c);
1030 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1031 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1033 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1034 (int64x2_t) __b,
1035 (int64x2_t) __c);
1038 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1039 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1041 return __a / __b;
1044 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1045 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1047 return __a / __b;
1050 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1051 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1053 return __a / __b;
1056 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1057 vmul_s8 (int8x8_t __a, int8x8_t __b)
1059 return __a * __b;
1062 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1063 vmul_s16 (int16x4_t __a, int16x4_t __b)
1065 return __a * __b;
1068 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1069 vmul_s32 (int32x2_t __a, int32x2_t __b)
1071 return __a * __b;
1074 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1075 vmul_f32 (float32x2_t __a, float32x2_t __b)
1077 return __a * __b;
1080 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1081 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1083 return __a * __b;
1086 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1087 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1089 return __a * __b;
1092 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1093 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1095 return __a * __b;
1098 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1099 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1101 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1102 (int8x8_t) __b);
1105 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1106 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1108 return __a * __b;
1111 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1112 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1114 return __a * __b;
1117 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1118 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1120 return __a * __b;
1123 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1124 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1126 return __a * __b;
1129 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1130 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1132 return __a * __b;
1135 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1136 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1138 return __a * __b;
1141 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1142 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1144 return __a * __b;
1147 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1148 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1150 return __a * __b;
1153 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1154 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1156 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1157 (int8x16_t) __b);
1160 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1161 vand_s8 (int8x8_t __a, int8x8_t __b)
1163 return __a & __b;
1166 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1167 vand_s16 (int16x4_t __a, int16x4_t __b)
1169 return __a & __b;
1172 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1173 vand_s32 (int32x2_t __a, int32x2_t __b)
1175 return __a & __b;
1178 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1179 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1181 return __a & __b;
1184 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1185 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1187 return __a & __b;
1190 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1191 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1193 return __a & __b;
1196 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1197 vand_s64 (int64x1_t __a, int64x1_t __b)
1199 return __a & __b;
1202 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1203 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1205 return __a & __b;
1208 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1209 vandq_s8 (int8x16_t __a, int8x16_t __b)
1211 return __a & __b;
1214 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1215 vandq_s16 (int16x8_t __a, int16x8_t __b)
1217 return __a & __b;
1220 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1221 vandq_s32 (int32x4_t __a, int32x4_t __b)
1223 return __a & __b;
1226 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1227 vandq_s64 (int64x2_t __a, int64x2_t __b)
1229 return __a & __b;
1232 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1233 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1235 return __a & __b;
1238 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1239 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1241 return __a & __b;
1244 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1245 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1247 return __a & __b;
1250 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1251 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1253 return __a & __b;
1256 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1257 vorr_s8 (int8x8_t __a, int8x8_t __b)
1259 return __a | __b;
1262 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1263 vorr_s16 (int16x4_t __a, int16x4_t __b)
1265 return __a | __b;
1268 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1269 vorr_s32 (int32x2_t __a, int32x2_t __b)
1271 return __a | __b;
1274 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1275 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1277 return __a | __b;
1280 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1281 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1283 return __a | __b;
1286 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1287 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1289 return __a | __b;
1292 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1293 vorr_s64 (int64x1_t __a, int64x1_t __b)
1295 return __a | __b;
1298 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1299 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1301 return __a | __b;
1304 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1305 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1307 return __a | __b;
1310 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1311 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1313 return __a | __b;
1316 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1317 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1319 return __a | __b;
1322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1323 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1325 return __a | __b;
1328 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1329 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1331 return __a | __b;
1334 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1335 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1337 return __a | __b;
1340 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1341 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1343 return __a | __b;
1346 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1347 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1349 return __a | __b;
1352 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1353 veor_s8 (int8x8_t __a, int8x8_t __b)
1355 return __a ^ __b;
1358 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1359 veor_s16 (int16x4_t __a, int16x4_t __b)
1361 return __a ^ __b;
1364 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1365 veor_s32 (int32x2_t __a, int32x2_t __b)
1367 return __a ^ __b;
1370 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1371 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1373 return __a ^ __b;
1376 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1377 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1379 return __a ^ __b;
1382 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1383 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1385 return __a ^ __b;
1388 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1389 veor_s64 (int64x1_t __a, int64x1_t __b)
1391 return __a ^ __b;
1394 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1395 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1397 return __a ^ __b;
1400 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1401 veorq_s8 (int8x16_t __a, int8x16_t __b)
1403 return __a ^ __b;
1406 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1407 veorq_s16 (int16x8_t __a, int16x8_t __b)
1409 return __a ^ __b;
1412 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1413 veorq_s32 (int32x4_t __a, int32x4_t __b)
1415 return __a ^ __b;
1418 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1419 veorq_s64 (int64x2_t __a, int64x2_t __b)
1421 return __a ^ __b;
1424 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1425 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1427 return __a ^ __b;
1430 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1431 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1433 return __a ^ __b;
1436 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1437 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1439 return __a ^ __b;
1442 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1443 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1445 return __a ^ __b;
1448 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1449 vbic_s8 (int8x8_t __a, int8x8_t __b)
1451 return __a & ~__b;
1454 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1455 vbic_s16 (int16x4_t __a, int16x4_t __b)
1457 return __a & ~__b;
1460 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1461 vbic_s32 (int32x2_t __a, int32x2_t __b)
1463 return __a & ~__b;
1466 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1467 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1469 return __a & ~__b;
1472 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1473 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1475 return __a & ~__b;
1478 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1479 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1481 return __a & ~__b;
1484 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1485 vbic_s64 (int64x1_t __a, int64x1_t __b)
1487 return __a & ~__b;
1490 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1491 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1493 return __a & ~__b;
1496 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1497 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1499 return __a & ~__b;
1502 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1503 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1505 return __a & ~__b;
1508 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1509 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1511 return __a & ~__b;
1514 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1515 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1517 return __a & ~__b;
1520 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1521 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1523 return __a & ~__b;
1526 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1527 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1529 return __a & ~__b;
1532 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1533 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1535 return __a & ~__b;
1538 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1539 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1541 return __a & ~__b;
1544 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1545 vorn_s8 (int8x8_t __a, int8x8_t __b)
1547 return __a | ~__b;
1550 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1551 vorn_s16 (int16x4_t __a, int16x4_t __b)
1553 return __a | ~__b;
1556 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1557 vorn_s32 (int32x2_t __a, int32x2_t __b)
1559 return __a | ~__b;
1562 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1563 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1565 return __a | ~__b;
1568 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1569 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1571 return __a | ~__b;
1574 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1575 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1577 return __a | ~__b;
1580 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1581 vorn_s64 (int64x1_t __a, int64x1_t __b)
1583 return __a | ~__b;
1586 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1587 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1589 return __a | ~__b;
1592 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1593 vornq_s8 (int8x16_t __a, int8x16_t __b)
1595 return __a | ~__b;
1598 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1599 vornq_s16 (int16x8_t __a, int16x8_t __b)
1601 return __a | ~__b;
1604 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1605 vornq_s32 (int32x4_t __a, int32x4_t __b)
1607 return __a | ~__b;
1610 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1611 vornq_s64 (int64x2_t __a, int64x2_t __b)
1613 return __a | ~__b;
1616 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1617 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1619 return __a | ~__b;
1622 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1623 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1625 return __a | ~__b;
1628 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1629 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1631 return __a | ~__b;
1634 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1635 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1637 return __a | ~__b;
1640 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1641 vsub_s8 (int8x8_t __a, int8x8_t __b)
1643 return __a - __b;
1646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1647 vsub_s16 (int16x4_t __a, int16x4_t __b)
1649 return __a - __b;
1652 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1653 vsub_s32 (int32x2_t __a, int32x2_t __b)
1655 return __a - __b;
1658 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1659 vsub_f32 (float32x2_t __a, float32x2_t __b)
1661 return __a - __b;
1664 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1665 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1667 return __a - __b;
1670 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1671 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1673 return __a - __b;
1676 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1677 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1679 return __a - __b;
1682 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1683 vsub_s64 (int64x1_t __a, int64x1_t __b)
1685 return __a - __b;
1688 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1689 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1691 return __a - __b;
1694 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1695 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1697 return __a - __b;
1700 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1701 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1703 return __a - __b;
1706 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1707 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1709 return __a - __b;
1712 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1713 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1715 return __a - __b;
1718 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1719 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1721 return __a - __b;
1724 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1725 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1727 return __a - __b;
1730 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1731 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1733 return __a - __b;
1736 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1737 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1739 return __a - __b;
1742 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1743 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1745 return __a - __b;
1748 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1749 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1751 return __a - __b;
1754 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1755 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1757 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1760 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1761 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1763 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1766 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1767 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1769 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1772 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1773 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1775 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1776 (int8x8_t) __b);
1779 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1780 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1782 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1783 (int16x4_t) __b);
1786 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1787 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1789 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1790 (int32x2_t) __b);
1793 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1794 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1796 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1799 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1800 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1802 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1805 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1806 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1808 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1811 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1812 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1814 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1815 (int8x16_t) __b);
1818 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1819 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
1821 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
1822 (int16x8_t) __b);
1825 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1826 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
1828 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
1829 (int32x4_t) __b);
1832 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1833 vsubw_s8 (int16x8_t __a, int8x8_t __b)
1835 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
1838 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1839 vsubw_s16 (int32x4_t __a, int16x4_t __b)
1841 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
1844 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1845 vsubw_s32 (int64x2_t __a, int32x2_t __b)
1847 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
1850 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1851 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
1853 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
1854 (int8x8_t) __b);
1857 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1858 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
1860 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
1861 (int16x4_t) __b);
1864 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1865 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
1867 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
1868 (int32x2_t) __b);
1871 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1872 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
1874 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
1877 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1878 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
1880 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
1883 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1884 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
1886 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
1889 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1890 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
1892 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
1893 (int8x16_t) __b);
1896 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1897 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
1899 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
1900 (int16x8_t) __b);
1903 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1904 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
1906 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
1907 (int32x4_t) __b);
1910 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1911 vqadd_s8 (int8x8_t __a, int8x8_t __b)
1913 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
1916 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1917 vqadd_s16 (int16x4_t __a, int16x4_t __b)
1919 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
1922 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1923 vqadd_s32 (int32x2_t __a, int32x2_t __b)
1925 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
1928 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1929 vqadd_s64 (int64x1_t __a, int64x1_t __b)
1931 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
1934 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1935 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
1937 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
1938 (int8x8_t) __b);
1941 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1942 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
1944 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
1945 (int16x4_t) __b);
1948 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1949 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
1951 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
1952 (int32x2_t) __b);
1955 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1956 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
1958 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
1959 (int64x1_t) __b);
1962 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1963 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
1965 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
1968 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1969 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
1971 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
1974 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1975 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
1977 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
1980 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1981 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
1983 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
1986 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1987 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1989 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
1990 (int8x16_t) __b);
1993 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1994 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1996 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
1997 (int16x8_t) __b);
2000 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2001 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2003 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2004 (int32x4_t) __b);
2007 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2008 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2010 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2011 (int64x2_t) __b);
2014 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2015 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2017 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2020 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2021 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2023 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2026 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2027 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2029 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2032 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2033 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2035 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2038 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2039 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2041 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2042 (int8x8_t) __b);
2045 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2046 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2048 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2049 (int16x4_t) __b);
2052 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2053 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2055 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2056 (int32x2_t) __b);
2059 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2060 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2062 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2063 (int64x1_t) __b);
2066 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2067 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2069 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2072 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2073 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2075 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2078 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2079 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2081 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2084 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2085 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2087 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2090 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2091 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2093 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2094 (int8x16_t) __b);
2097 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2098 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2100 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2101 (int16x8_t) __b);
2104 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2105 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2107 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2108 (int32x4_t) __b);
2111 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2112 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2114 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2115 (int64x2_t) __b);
2118 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2119 vqneg_s8 (int8x8_t __a)
2121 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2124 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2125 vqneg_s16 (int16x4_t __a)
2127 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2130 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2131 vqneg_s32 (int32x2_t __a)
2133 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2136 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2137 vqnegq_s8 (int8x16_t __a)
2139 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2142 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2143 vqnegq_s16 (int16x8_t __a)
2145 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2148 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2149 vqnegq_s32 (int32x4_t __a)
2151 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2154 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2155 vqabs_s8 (int8x8_t __a)
2157 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2160 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2161 vqabs_s16 (int16x4_t __a)
2163 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2166 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2167 vqabs_s32 (int32x2_t __a)
2169 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2172 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2173 vqabsq_s8 (int8x16_t __a)
2175 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2178 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2179 vqabsq_s16 (int16x8_t __a)
2181 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2184 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2185 vqabsq_s32 (int32x4_t __a)
2187 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2190 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2191 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2193 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2196 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2197 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2199 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2202 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2203 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2205 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2208 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2209 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2211 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2214 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2215 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2217 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2220 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2221 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2223 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2226 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2227 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2229 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2232 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2233 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2235 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2238 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2239 vcreate_s8 (uint64_t __a)
2241 return (int8x8_t) __a;
2244 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2245 vcreate_s16 (uint64_t __a)
2247 return (int16x4_t) __a;
2250 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2251 vcreate_s32 (uint64_t __a)
2253 return (int32x2_t) __a;
2256 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2257 vcreate_s64 (uint64_t __a)
2259 return (int64x1_t) __a;
2262 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2263 vcreate_f32 (uint64_t __a)
2265 return (float32x2_t) __a;
2268 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2269 vcreate_u8 (uint64_t __a)
2271 return (uint8x8_t) __a;
2274 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2275 vcreate_u16 (uint64_t __a)
2277 return (uint16x4_t) __a;
2280 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2281 vcreate_u32 (uint64_t __a)
2283 return (uint32x2_t) __a;
2286 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2287 vcreate_u64 (uint64_t __a)
2289 return (uint64x1_t) __a;
2292 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2293 vcreate_f64 (uint64_t __a)
2295 return (float64x1_t) __builtin_aarch64_createdf (__a);
2298 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2299 vcreate_p8 (uint64_t __a)
2301 return (poly8x8_t) __a;
2304 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2305 vcreate_p16 (uint64_t __a)
2307 return (poly16x4_t) __a;
2310 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2311 vget_lane_s8 (int8x8_t __a, const int __b)
2313 return (int8_t) __builtin_aarch64_get_lane_signedv8qi (__a, __b);
2316 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2317 vget_lane_s16 (int16x4_t __a, const int __b)
2319 return (int16_t) __builtin_aarch64_get_lane_signedv4hi (__a, __b);
2322 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2323 vget_lane_s32 (int32x2_t __a, const int __b)
2325 return (int32_t) __builtin_aarch64_get_lane_signedv2si (__a, __b);
2328 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2329 vget_lane_f32 (float32x2_t __a, const int __b)
2331 return (float32_t) __builtin_aarch64_get_lanev2sf (__a, __b);
2334 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2335 vget_lane_u8 (uint8x8_t __a, const int __b)
2337 return (uint8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
2338 __b);
2341 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2342 vget_lane_u16 (uint16x4_t __a, const int __b)
2344 return (uint16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
2345 __b);
2348 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2349 vget_lane_u32 (uint32x2_t __a, const int __b)
2351 return (uint32_t) __builtin_aarch64_get_lane_unsignedv2si ((int32x2_t) __a,
2352 __b);
2355 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2356 vget_lane_p8 (poly8x8_t __a, const int __b)
2358 return (poly8_t) __builtin_aarch64_get_lane_unsignedv8qi ((int8x8_t) __a,
2359 __b);
2362 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2363 vget_lane_p16 (poly16x4_t __a, const int __b)
2365 return (poly16_t) __builtin_aarch64_get_lane_unsignedv4hi ((int16x4_t) __a,
2366 __b);
2369 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2370 vget_lane_s64 (int64x1_t __a, const int __b)
2372 return (int64_t) __builtin_aarch64_get_lanedi (__a, __b);
2375 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2376 vget_lane_u64 (uint64x1_t __a, const int __b)
2378 return (uint64_t) __builtin_aarch64_get_lanedi ((int64x1_t) __a, __b);
2381 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2382 vgetq_lane_s8 (int8x16_t __a, const int __b)
2384 return (int8_t) __builtin_aarch64_get_lane_signedv16qi (__a, __b);
2387 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2388 vgetq_lane_s16 (int16x8_t __a, const int __b)
2390 return (int16_t) __builtin_aarch64_get_lane_signedv8hi (__a, __b);
2393 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2394 vgetq_lane_s32 (int32x4_t __a, const int __b)
2396 return (int32_t) __builtin_aarch64_get_lane_signedv4si (__a, __b);
2399 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2400 vgetq_lane_f32 (float32x4_t __a, const int __b)
2402 return (float32_t) __builtin_aarch64_get_lanev4sf (__a, __b);
2405 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2406 vgetq_lane_f64 (float64x2_t __a, const int __b)
2408 return (float64_t) __builtin_aarch64_get_lanev2df (__a, __b);
2411 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2412 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2414 return (uint8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
2415 __b);
2418 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2419 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2421 return (uint16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
2422 __b);
2425 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2426 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2428 return (uint32_t) __builtin_aarch64_get_lane_unsignedv4si ((int32x4_t) __a,
2429 __b);
2432 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2433 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2435 return (poly8_t) __builtin_aarch64_get_lane_unsignedv16qi ((int8x16_t) __a,
2436 __b);
2439 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2440 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2442 return (poly16_t) __builtin_aarch64_get_lane_unsignedv8hi ((int16x8_t) __a,
2443 __b);
2446 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2447 vgetq_lane_s64 (int64x2_t __a, const int __b)
2449 return __builtin_aarch64_get_lane_unsignedv2di (__a, __b);
2452 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2453 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2455 return (uint64_t) __builtin_aarch64_get_lane_unsignedv2di ((int64x2_t) __a,
2456 __b);
2459 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2460 vreinterpret_p8_s8 (int8x8_t __a)
2462 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
2465 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2466 vreinterpret_p8_s16 (int16x4_t __a)
2468 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
2471 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2472 vreinterpret_p8_s32 (int32x2_t __a)
2474 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
2477 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2478 vreinterpret_p8_s64 (int64x1_t __a)
2480 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
2483 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2484 vreinterpret_p8_f32 (float32x2_t __a)
2486 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
2489 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2490 vreinterpret_p8_u8 (uint8x8_t __a)
2492 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
2495 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2496 vreinterpret_p8_u16 (uint16x4_t __a)
2498 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2501 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2502 vreinterpret_p8_u32 (uint32x2_t __a)
2504 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
2507 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2508 vreinterpret_p8_u64 (uint64x1_t __a)
2510 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
2513 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2514 vreinterpret_p8_p16 (poly16x4_t __a)
2516 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2519 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2520 vreinterpretq_p8_s8 (int8x16_t __a)
2522 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
2525 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2526 vreinterpretq_p8_s16 (int16x8_t __a)
2528 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
2531 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2532 vreinterpretq_p8_s32 (int32x4_t __a)
2534 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
2537 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2538 vreinterpretq_p8_s64 (int64x2_t __a)
2540 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
2543 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2544 vreinterpretq_p8_f32 (float32x4_t __a)
2546 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
2549 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2550 vreinterpretq_p8_u8 (uint8x16_t __a)
2552 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
2553 __a);
2556 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2557 vreinterpretq_p8_u16 (uint16x8_t __a)
2559 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2560 __a);
2563 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2564 vreinterpretq_p8_u32 (uint32x4_t __a)
2566 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
2567 __a);
2570 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2571 vreinterpretq_p8_u64 (uint64x2_t __a)
2573 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
2574 __a);
2577 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2578 vreinterpretq_p8_p16 (poly16x8_t __a)
2580 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2581 __a);
2584 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2585 vreinterpret_p16_s8 (int8x8_t __a)
2587 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
2590 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2591 vreinterpret_p16_s16 (int16x4_t __a)
2593 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
2596 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2597 vreinterpret_p16_s32 (int32x2_t __a)
2599 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
2602 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2603 vreinterpret_p16_s64 (int64x1_t __a)
2605 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
2608 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2609 vreinterpret_p16_f32 (float32x2_t __a)
2611 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
2614 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2615 vreinterpret_p16_u8 (uint8x8_t __a)
2617 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2620 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2621 vreinterpret_p16_u16 (uint16x4_t __a)
2623 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
2626 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2627 vreinterpret_p16_u32 (uint32x2_t __a)
2629 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
2632 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2633 vreinterpret_p16_u64 (uint64x1_t __a)
2635 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
2638 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2639 vreinterpret_p16_p8 (poly8x8_t __a)
2641 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2644 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2645 vreinterpretq_p16_s8 (int8x16_t __a)
2647 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
2650 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2651 vreinterpretq_p16_s16 (int16x8_t __a)
2653 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
2656 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2657 vreinterpretq_p16_s32 (int32x4_t __a)
2659 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
2662 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2663 vreinterpretq_p16_s64 (int64x2_t __a)
2665 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
2668 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2669 vreinterpretq_p16_f32 (float32x4_t __a)
2671 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
2674 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2675 vreinterpretq_p16_u8 (uint8x16_t __a)
2677 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2678 __a);
2681 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2682 vreinterpretq_p16_u16 (uint16x8_t __a)
2684 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
2687 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2688 vreinterpretq_p16_u32 (uint32x4_t __a)
2690 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
2693 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2694 vreinterpretq_p16_u64 (uint64x2_t __a)
2696 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
2699 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2700 vreinterpretq_p16_p8 (poly8x16_t __a)
2702 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2703 __a);
2706 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2707 vreinterpret_f32_s8 (int8x8_t __a)
2709 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
2712 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2713 vreinterpret_f32_s16 (int16x4_t __a)
2715 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
2718 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2719 vreinterpret_f32_s32 (int32x2_t __a)
2721 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
2724 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2725 vreinterpret_f32_s64 (int64x1_t __a)
2727 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
2730 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2731 vreinterpret_f32_u8 (uint8x8_t __a)
2733 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2736 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2737 vreinterpret_f32_u16 (uint16x4_t __a)
2739 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2740 __a);
2743 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2744 vreinterpret_f32_u32 (uint32x2_t __a)
2746 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
2747 __a);
2750 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2751 vreinterpret_f32_u64 (uint64x1_t __a)
2753 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
2756 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2757 vreinterpret_f32_p8 (poly8x8_t __a)
2759 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2762 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2763 vreinterpret_f32_p16 (poly16x4_t __a)
2765 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2766 __a);
2769 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2770 vreinterpretq_f32_s8 (int8x16_t __a)
2772 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
2775 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2776 vreinterpretq_f32_s16 (int16x8_t __a)
2778 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
2781 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2782 vreinterpretq_f32_s32 (int32x4_t __a)
2784 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
2787 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2788 vreinterpretq_f32_s64 (int64x2_t __a)
2790 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
2793 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2794 vreinterpretq_f32_u8 (uint8x16_t __a)
2796 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2797 __a);
2800 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2801 vreinterpretq_f32_u16 (uint16x8_t __a)
2803 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2804 __a);
2807 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2808 vreinterpretq_f32_u32 (uint32x4_t __a)
2810 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
2811 __a);
2814 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2815 vreinterpretq_f32_u64 (uint64x2_t __a)
2817 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
2818 __a);
2821 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2822 vreinterpretq_f32_p8 (poly8x16_t __a)
2824 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2825 __a);
2828 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2829 vreinterpretq_f32_p16 (poly16x8_t __a)
2831 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2832 __a);
2835 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2836 vreinterpret_s64_s8 (int8x8_t __a)
2838 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
2841 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2842 vreinterpret_s64_s16 (int16x4_t __a)
2844 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
2847 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2848 vreinterpret_s64_s32 (int32x2_t __a)
2850 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
2853 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2854 vreinterpret_s64_f32 (float32x2_t __a)
2856 return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
2859 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2860 vreinterpret_s64_u8 (uint8x8_t __a)
2862 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2865 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2866 vreinterpret_s64_u16 (uint16x4_t __a)
2868 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2871 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2872 vreinterpret_s64_u32 (uint32x2_t __a)
2874 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
2877 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2878 vreinterpret_s64_u64 (uint64x1_t __a)
2880 return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
2883 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2884 vreinterpret_s64_p8 (poly8x8_t __a)
2886 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2889 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2890 vreinterpret_s64_p16 (poly16x4_t __a)
2892 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2895 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2896 vreinterpretq_s64_s8 (int8x16_t __a)
2898 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
2901 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2902 vreinterpretq_s64_s16 (int16x8_t __a)
2904 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
2907 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2908 vreinterpretq_s64_s32 (int32x4_t __a)
2910 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
2913 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2914 vreinterpretq_s64_f32 (float32x4_t __a)
2916 return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
2919 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2920 vreinterpretq_s64_u8 (uint8x16_t __a)
2922 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
2925 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2926 vreinterpretq_s64_u16 (uint16x8_t __a)
2928 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
2931 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2932 vreinterpretq_s64_u32 (uint32x4_t __a)
2934 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
2937 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2938 vreinterpretq_s64_u64 (uint64x2_t __a)
2940 return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
2943 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2944 vreinterpretq_s64_p8 (poly8x16_t __a)
2946 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
2949 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2950 vreinterpretq_s64_p16 (poly16x8_t __a)
2952 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
2955 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2956 vreinterpret_u64_s8 (int8x8_t __a)
2958 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
2961 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2962 vreinterpret_u64_s16 (int16x4_t __a)
2964 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
2967 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2968 vreinterpret_u64_s32 (int32x2_t __a)
2970 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
2973 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2974 vreinterpret_u64_s64 (int64x1_t __a)
2976 return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
2979 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2980 vreinterpret_u64_f32 (float32x2_t __a)
2982 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
2985 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2986 vreinterpret_u64_u8 (uint8x8_t __a)
2988 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2991 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2992 vreinterpret_u64_u16 (uint16x4_t __a)
2994 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2997 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2998 vreinterpret_u64_u32 (uint32x2_t __a)
3000 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3004 vreinterpret_u64_p8 (poly8x8_t __a)
3006 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3009 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3010 vreinterpret_u64_p16 (poly16x4_t __a)
3012 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3015 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3016 vreinterpretq_u64_s8 (int8x16_t __a)
3018 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3021 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3022 vreinterpretq_u64_s16 (int16x8_t __a)
3024 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3027 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3028 vreinterpretq_u64_s32 (int32x4_t __a)
3030 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3033 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3034 vreinterpretq_u64_s64 (int64x2_t __a)
3036 return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
3039 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3040 vreinterpretq_u64_f32 (float32x4_t __a)
3042 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3045 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3046 vreinterpretq_u64_u8 (uint8x16_t __a)
3048 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3049 __a);
3052 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3053 vreinterpretq_u64_u16 (uint16x8_t __a)
3055 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3058 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3059 vreinterpretq_u64_u32 (uint32x4_t __a)
3061 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3064 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3065 vreinterpretq_u64_p8 (poly8x16_t __a)
3067 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3068 __a);
3071 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3072 vreinterpretq_u64_p16 (poly16x8_t __a)
3074 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3077 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3078 vreinterpret_s8_s16 (int16x4_t __a)
3080 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3083 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3084 vreinterpret_s8_s32 (int32x2_t __a)
3086 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3089 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3090 vreinterpret_s8_s64 (int64x1_t __a)
3092 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3096 vreinterpret_s8_f32 (float32x2_t __a)
3098 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3101 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3102 vreinterpret_s8_u8 (uint8x8_t __a)
3104 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3107 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3108 vreinterpret_s8_u16 (uint16x4_t __a)
3110 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3113 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3114 vreinterpret_s8_u32 (uint32x2_t __a)
3116 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3119 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3120 vreinterpret_s8_u64 (uint64x1_t __a)
3122 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3125 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3126 vreinterpret_s8_p8 (poly8x8_t __a)
3128 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3131 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3132 vreinterpret_s8_p16 (poly16x4_t __a)
3134 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3137 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3138 vreinterpretq_s8_s16 (int16x8_t __a)
3140 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3143 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3144 vreinterpretq_s8_s32 (int32x4_t __a)
3146 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3149 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3150 vreinterpretq_s8_s64 (int64x2_t __a)
3152 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3155 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3156 vreinterpretq_s8_f32 (float32x4_t __a)
3158 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3161 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3162 vreinterpretq_s8_u8 (uint8x16_t __a)
3164 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3165 __a);
3168 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3169 vreinterpretq_s8_u16 (uint16x8_t __a)
3171 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3174 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3175 vreinterpretq_s8_u32 (uint32x4_t __a)
3177 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
3180 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3181 vreinterpretq_s8_u64 (uint64x2_t __a)
3183 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
3186 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3187 vreinterpretq_s8_p8 (poly8x16_t __a)
3189 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3190 __a);
3193 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3194 vreinterpretq_s8_p16 (poly16x8_t __a)
3196 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3199 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3200 vreinterpret_s16_s8 (int8x8_t __a)
3202 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3205 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3206 vreinterpret_s16_s32 (int32x2_t __a)
3208 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3211 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3212 vreinterpret_s16_s64 (int64x1_t __a)
3214 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3217 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3218 vreinterpret_s16_f32 (float32x2_t __a)
3220 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3223 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3224 vreinterpret_s16_u8 (uint8x8_t __a)
3226 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3229 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3230 vreinterpret_s16_u16 (uint16x4_t __a)
3232 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3235 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3236 vreinterpret_s16_u32 (uint32x2_t __a)
3238 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3241 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3242 vreinterpret_s16_u64 (uint64x1_t __a)
3244 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3247 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3248 vreinterpret_s16_p8 (poly8x8_t __a)
3250 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3253 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3254 vreinterpret_s16_p16 (poly16x4_t __a)
3256 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3259 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3260 vreinterpretq_s16_s8 (int8x16_t __a)
3262 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3265 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3266 vreinterpretq_s16_s32 (int32x4_t __a)
3268 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3271 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3272 vreinterpretq_s16_s64 (int64x2_t __a)
3274 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3277 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3278 vreinterpretq_s16_f32 (float32x4_t __a)
3280 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3283 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3284 vreinterpretq_s16_u8 (uint8x16_t __a)
3286 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3289 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3290 vreinterpretq_s16_u16 (uint16x8_t __a)
3292 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3295 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3296 vreinterpretq_s16_u32 (uint32x4_t __a)
3298 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3301 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3302 vreinterpretq_s16_u64 (uint64x2_t __a)
3304 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3307 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3308 vreinterpretq_s16_p8 (poly8x16_t __a)
3310 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3313 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3314 vreinterpretq_s16_p16 (poly16x8_t __a)
3316 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3319 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3320 vreinterpret_s32_s8 (int8x8_t __a)
3322 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3325 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3326 vreinterpret_s32_s16 (int16x4_t __a)
3328 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3331 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3332 vreinterpret_s32_s64 (int64x1_t __a)
3334 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3337 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3338 vreinterpret_s32_f32 (float32x2_t __a)
3340 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3343 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3344 vreinterpret_s32_u8 (uint8x8_t __a)
3346 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3349 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3350 vreinterpret_s32_u16 (uint16x4_t __a)
3352 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3355 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3356 vreinterpret_s32_u32 (uint32x2_t __a)
3358 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
3361 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3362 vreinterpret_s32_u64 (uint64x1_t __a)
3364 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3367 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3368 vreinterpret_s32_p8 (poly8x8_t __a)
3370 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3373 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3374 vreinterpret_s32_p16 (poly16x4_t __a)
3376 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3379 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3380 vreinterpretq_s32_s8 (int8x16_t __a)
3382 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3385 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3386 vreinterpretq_s32_s16 (int16x8_t __a)
3388 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3391 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3392 vreinterpretq_s32_s64 (int64x2_t __a)
3394 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3397 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3398 vreinterpretq_s32_f32 (float32x4_t __a)
3400 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3403 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3404 vreinterpretq_s32_u8 (uint8x16_t __a)
3406 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3409 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3410 vreinterpretq_s32_u16 (uint16x8_t __a)
3412 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3415 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3416 vreinterpretq_s32_u32 (uint32x4_t __a)
3418 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
3421 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3422 vreinterpretq_s32_u64 (uint64x2_t __a)
3424 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3427 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3428 vreinterpretq_s32_p8 (poly8x16_t __a)
3430 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3433 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3434 vreinterpretq_s32_p16 (poly16x8_t __a)
3436 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3439 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3440 vreinterpret_u8_s8 (int8x8_t __a)
3442 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
3445 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3446 vreinterpret_u8_s16 (int16x4_t __a)
3448 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3451 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3452 vreinterpret_u8_s32 (int32x2_t __a)
3454 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3457 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3458 vreinterpret_u8_s64 (int64x1_t __a)
3460 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3463 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3464 vreinterpret_u8_f32 (float32x2_t __a)
3466 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3469 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3470 vreinterpret_u8_u16 (uint16x4_t __a)
3472 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3475 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3476 vreinterpret_u8_u32 (uint32x2_t __a)
3478 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3481 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3482 vreinterpret_u8_u64 (uint64x1_t __a)
3484 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3487 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3488 vreinterpret_u8_p8 (poly8x8_t __a)
3490 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3493 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3494 vreinterpret_u8_p16 (poly16x4_t __a)
3496 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3499 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3500 vreinterpretq_u8_s8 (int8x16_t __a)
3502 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
3505 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3506 vreinterpretq_u8_s16 (int16x8_t __a)
3508 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3511 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3512 vreinterpretq_u8_s32 (int32x4_t __a)
3514 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3517 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3518 vreinterpretq_u8_s64 (int64x2_t __a)
3520 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3523 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3524 vreinterpretq_u8_f32 (float32x4_t __a)
3526 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3529 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3530 vreinterpretq_u8_u16 (uint16x8_t __a)
3532 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3533 __a);
3536 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3537 vreinterpretq_u8_u32 (uint32x4_t __a)
3539 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
3540 __a);
3543 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3544 vreinterpretq_u8_u64 (uint64x2_t __a)
3546 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
3547 __a);
3550 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3551 vreinterpretq_u8_p8 (poly8x16_t __a)
3553 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3554 __a);
3557 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3558 vreinterpretq_u8_p16 (poly16x8_t __a)
3560 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3561 __a);
3564 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3565 vreinterpret_u16_s8 (int8x8_t __a)
3567 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3570 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3571 vreinterpret_u16_s16 (int16x4_t __a)
3573 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
3576 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3577 vreinterpret_u16_s32 (int32x2_t __a)
3579 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3582 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3583 vreinterpret_u16_s64 (int64x1_t __a)
3585 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3588 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3589 vreinterpret_u16_f32 (float32x2_t __a)
3591 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3594 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3595 vreinterpret_u16_u8 (uint8x8_t __a)
3597 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3600 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3601 vreinterpret_u16_u32 (uint32x2_t __a)
3603 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3606 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3607 vreinterpret_u16_u64 (uint64x1_t __a)
3609 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3612 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3613 vreinterpret_u16_p8 (poly8x8_t __a)
3615 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3618 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3619 vreinterpret_u16_p16 (poly16x4_t __a)
3621 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3624 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3625 vreinterpretq_u16_s8 (int8x16_t __a)
3627 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3630 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3631 vreinterpretq_u16_s16 (int16x8_t __a)
3633 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
3636 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3637 vreinterpretq_u16_s32 (int32x4_t __a)
3639 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3642 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3643 vreinterpretq_u16_s64 (int64x2_t __a)
3645 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3648 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3649 vreinterpretq_u16_f32 (float32x4_t __a)
3651 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3654 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3655 vreinterpretq_u16_u8 (uint8x16_t __a)
3657 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3658 __a);
3661 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3662 vreinterpretq_u16_u32 (uint32x4_t __a)
3664 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3667 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3668 vreinterpretq_u16_u64 (uint64x2_t __a)
3670 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3673 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3674 vreinterpretq_u16_p8 (poly8x16_t __a)
3676 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3677 __a);
3680 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3681 vreinterpretq_u16_p16 (poly16x8_t __a)
3683 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3686 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3687 vreinterpret_u32_s8 (int8x8_t __a)
3689 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3692 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3693 vreinterpret_u32_s16 (int16x4_t __a)
3695 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3698 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3699 vreinterpret_u32_s32 (int32x2_t __a)
3701 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
3704 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3705 vreinterpret_u32_s64 (int64x1_t __a)
3707 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3710 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3711 vreinterpret_u32_f32 (float32x2_t __a)
3713 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3716 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3717 vreinterpret_u32_u8 (uint8x8_t __a)
3719 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3722 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3723 vreinterpret_u32_u16 (uint16x4_t __a)
3725 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3728 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3729 vreinterpret_u32_u64 (uint64x1_t __a)
3731 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3734 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3735 vreinterpret_u32_p8 (poly8x8_t __a)
3737 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3740 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3741 vreinterpret_u32_p16 (poly16x4_t __a)
3743 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3747 vreinterpretq_u32_s8 (int8x16_t __a)
3749 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3752 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3753 vreinterpretq_u32_s16 (int16x8_t __a)
3755 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3758 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3759 vreinterpretq_u32_s32 (int32x4_t __a)
3761 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
3764 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3765 vreinterpretq_u32_s64 (int64x2_t __a)
3767 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3770 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3771 vreinterpretq_u32_f32 (float32x4_t __a)
3773 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3776 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3777 vreinterpretq_u32_u8 (uint8x16_t __a)
3779 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3780 __a);
3783 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3784 vreinterpretq_u32_u16 (uint16x8_t __a)
3786 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3789 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3790 vreinterpretq_u32_u64 (uint64x2_t __a)
3792 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3795 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3796 vreinterpretq_u32_p8 (poly8x16_t __a)
3798 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3799 __a);
3802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3803 vreinterpretq_u32_p16 (poly16x8_t __a)
3805 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3808 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3809 vcombine_s8 (int8x8_t __a, int8x8_t __b)
3811 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
3814 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3815 vcombine_s16 (int16x4_t __a, int16x4_t __b)
3817 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
3820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3821 vcombine_s32 (int32x2_t __a, int32x2_t __b)
3823 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
3826 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3827 vcombine_s64 (int64x1_t __a, int64x1_t __b)
3829 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
3832 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3833 vcombine_f32 (float32x2_t __a, float32x2_t __b)
3835 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
3838 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3839 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
3841 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
3842 (int8x8_t) __b);
3845 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3846 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
3848 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
3849 (int16x4_t) __b);
3852 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3853 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
3855 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
3856 (int32x2_t) __b);
3859 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3860 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
3862 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
3863 (int64x1_t) __b);
3866 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
3867 vcombine_f64 (float64x1_t __a, float64x1_t __b)
3869 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
3872 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3873 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
3875 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
3876 (int8x8_t) __b);
3879 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3880 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
3882 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
3883 (int16x4_t) __b);
3886 /* Start of temporary inline asm implementations. */
3888 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3889 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
3891 int8x8_t result;
3892 __asm__ ("saba %0.8b,%2.8b,%3.8b"
3893 : "=w"(result)
3894 : "0"(a), "w"(b), "w"(c)
3895 : /* No clobbers */);
3896 return result;
3899 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3900 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
3902 int16x4_t result;
3903 __asm__ ("saba %0.4h,%2.4h,%3.4h"
3904 : "=w"(result)
3905 : "0"(a), "w"(b), "w"(c)
3906 : /* No clobbers */);
3907 return result;
3910 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3911 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
3913 int32x2_t result;
3914 __asm__ ("saba %0.2s,%2.2s,%3.2s"
3915 : "=w"(result)
3916 : "0"(a), "w"(b), "w"(c)
3917 : /* No clobbers */);
3918 return result;
3921 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3922 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
3924 uint8x8_t result;
3925 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
3926 : "=w"(result)
3927 : "0"(a), "w"(b), "w"(c)
3928 : /* No clobbers */);
3929 return result;
3932 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3933 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
3935 uint16x4_t result;
3936 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
3937 : "=w"(result)
3938 : "0"(a), "w"(b), "w"(c)
3939 : /* No clobbers */);
3940 return result;
3943 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3944 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
3946 uint32x2_t result;
3947 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
3948 : "=w"(result)
3949 : "0"(a), "w"(b), "w"(c)
3950 : /* No clobbers */);
3951 return result;
3954 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3955 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
3957 int16x8_t result;
3958 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
3959 : "=w"(result)
3960 : "0"(a), "w"(b), "w"(c)
3961 : /* No clobbers */);
3962 return result;
3965 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3966 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
3968 int32x4_t result;
3969 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
3970 : "=w"(result)
3971 : "0"(a), "w"(b), "w"(c)
3972 : /* No clobbers */);
3973 return result;
3976 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3977 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
3979 int64x2_t result;
3980 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
3981 : "=w"(result)
3982 : "0"(a), "w"(b), "w"(c)
3983 : /* No clobbers */);
3984 return result;
3987 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3988 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
3990 uint16x8_t result;
3991 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
3992 : "=w"(result)
3993 : "0"(a), "w"(b), "w"(c)
3994 : /* No clobbers */);
3995 return result;
3998 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3999 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4001 uint32x4_t result;
4002 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4003 : "=w"(result)
4004 : "0"(a), "w"(b), "w"(c)
4005 : /* No clobbers */);
4006 return result;
4009 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4010 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4012 uint64x2_t result;
4013 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4014 : "=w"(result)
4015 : "0"(a), "w"(b), "w"(c)
4016 : /* No clobbers */);
4017 return result;
4020 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4021 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4023 int16x8_t result;
4024 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4025 : "=w"(result)
4026 : "0"(a), "w"(b), "w"(c)
4027 : /* No clobbers */);
4028 return result;
4031 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4032 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4034 int32x4_t result;
4035 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4036 : "=w"(result)
4037 : "0"(a), "w"(b), "w"(c)
4038 : /* No clobbers */);
4039 return result;
4042 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4043 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4045 int64x2_t result;
4046 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4047 : "=w"(result)
4048 : "0"(a), "w"(b), "w"(c)
4049 : /* No clobbers */);
4050 return result;
4053 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4054 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4056 uint16x8_t result;
4057 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4058 : "=w"(result)
4059 : "0"(a), "w"(b), "w"(c)
4060 : /* No clobbers */);
4061 return result;
4064 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4065 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4067 uint32x4_t result;
4068 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4069 : "=w"(result)
4070 : "0"(a), "w"(b), "w"(c)
4071 : /* No clobbers */);
4072 return result;
4075 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4076 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4078 uint64x2_t result;
4079 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4080 : "=w"(result)
4081 : "0"(a), "w"(b), "w"(c)
4082 : /* No clobbers */);
4083 return result;
4086 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4087 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4089 int8x16_t result;
4090 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4091 : "=w"(result)
4092 : "0"(a), "w"(b), "w"(c)
4093 : /* No clobbers */);
4094 return result;
4097 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4098 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4100 int16x8_t result;
4101 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4102 : "=w"(result)
4103 : "0"(a), "w"(b), "w"(c)
4104 : /* No clobbers */);
4105 return result;
4108 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4109 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4111 int32x4_t result;
4112 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4113 : "=w"(result)
4114 : "0"(a), "w"(b), "w"(c)
4115 : /* No clobbers */);
4116 return result;
4119 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4120 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4122 uint8x16_t result;
4123 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4124 : "=w"(result)
4125 : "0"(a), "w"(b), "w"(c)
4126 : /* No clobbers */);
4127 return result;
4130 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4131 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4133 uint16x8_t result;
4134 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4135 : "=w"(result)
4136 : "0"(a), "w"(b), "w"(c)
4137 : /* No clobbers */);
4138 return result;
4141 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4142 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4144 uint32x4_t result;
4145 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4146 : "=w"(result)
4147 : "0"(a), "w"(b), "w"(c)
4148 : /* No clobbers */);
4149 return result;
4152 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4153 vabd_f32 (float32x2_t a, float32x2_t b)
4155 float32x2_t result;
4156 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4157 : "=w"(result)
4158 : "w"(a), "w"(b)
4159 : /* No clobbers */);
4160 return result;
4163 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4164 vabd_s8 (int8x8_t a, int8x8_t b)
4166 int8x8_t result;
4167 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4168 : "=w"(result)
4169 : "w"(a), "w"(b)
4170 : /* No clobbers */);
4171 return result;
4174 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4175 vabd_s16 (int16x4_t a, int16x4_t b)
4177 int16x4_t result;
4178 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4179 : "=w"(result)
4180 : "w"(a), "w"(b)
4181 : /* No clobbers */);
4182 return result;
4185 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4186 vabd_s32 (int32x2_t a, int32x2_t b)
4188 int32x2_t result;
4189 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4190 : "=w"(result)
4191 : "w"(a), "w"(b)
4192 : /* No clobbers */);
4193 return result;
4196 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4197 vabd_u8 (uint8x8_t a, uint8x8_t b)
4199 uint8x8_t result;
4200 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4201 : "=w"(result)
4202 : "w"(a), "w"(b)
4203 : /* No clobbers */);
4204 return result;
4207 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4208 vabd_u16 (uint16x4_t a, uint16x4_t b)
4210 uint16x4_t result;
4211 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4212 : "=w"(result)
4213 : "w"(a), "w"(b)
4214 : /* No clobbers */);
4215 return result;
4218 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4219 vabd_u32 (uint32x2_t a, uint32x2_t b)
4221 uint32x2_t result;
4222 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4223 : "=w"(result)
4224 : "w"(a), "w"(b)
4225 : /* No clobbers */);
4226 return result;
4229 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4230 vabdd_f64 (float64_t a, float64_t b)
4232 float64_t result;
4233 __asm__ ("fabd %d0, %d1, %d2"
4234 : "=w"(result)
4235 : "w"(a), "w"(b)
4236 : /* No clobbers */);
4237 return result;
4240 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4241 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4243 int16x8_t result;
4244 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4245 : "=w"(result)
4246 : "w"(a), "w"(b)
4247 : /* No clobbers */);
4248 return result;
4251 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4252 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4254 int32x4_t result;
4255 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4256 : "=w"(result)
4257 : "w"(a), "w"(b)
4258 : /* No clobbers */);
4259 return result;
4262 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4263 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4265 int64x2_t result;
4266 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4267 : "=w"(result)
4268 : "w"(a), "w"(b)
4269 : /* No clobbers */);
4270 return result;
4273 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4274 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4276 uint16x8_t result;
4277 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4278 : "=w"(result)
4279 : "w"(a), "w"(b)
4280 : /* No clobbers */);
4281 return result;
4284 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4285 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4287 uint32x4_t result;
4288 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4289 : "=w"(result)
4290 : "w"(a), "w"(b)
4291 : /* No clobbers */);
4292 return result;
4295 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4296 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4298 uint64x2_t result;
4299 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4300 : "=w"(result)
4301 : "w"(a), "w"(b)
4302 : /* No clobbers */);
4303 return result;
4306 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4307 vabdl_s8 (int8x8_t a, int8x8_t b)
4309 int16x8_t result;
4310 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4311 : "=w"(result)
4312 : "w"(a), "w"(b)
4313 : /* No clobbers */);
4314 return result;
4317 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4318 vabdl_s16 (int16x4_t a, int16x4_t b)
4320 int32x4_t result;
4321 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4322 : "=w"(result)
4323 : "w"(a), "w"(b)
4324 : /* No clobbers */);
4325 return result;
4328 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4329 vabdl_s32 (int32x2_t a, int32x2_t b)
4331 int64x2_t result;
4332 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4333 : "=w"(result)
4334 : "w"(a), "w"(b)
4335 : /* No clobbers */);
4336 return result;
4339 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4340 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4342 uint16x8_t result;
4343 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4344 : "=w"(result)
4345 : "w"(a), "w"(b)
4346 : /* No clobbers */);
4347 return result;
4350 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4351 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4353 uint32x4_t result;
4354 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4355 : "=w"(result)
4356 : "w"(a), "w"(b)
4357 : /* No clobbers */);
4358 return result;
4361 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4362 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4364 uint64x2_t result;
4365 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4366 : "=w"(result)
4367 : "w"(a), "w"(b)
4368 : /* No clobbers */);
4369 return result;
4372 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4373 vabdq_f32 (float32x4_t a, float32x4_t b)
4375 float32x4_t result;
4376 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4377 : "=w"(result)
4378 : "w"(a), "w"(b)
4379 : /* No clobbers */);
4380 return result;
4383 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4384 vabdq_f64 (float64x2_t a, float64x2_t b)
4386 float64x2_t result;
4387 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4388 : "=w"(result)
4389 : "w"(a), "w"(b)
4390 : /* No clobbers */);
4391 return result;
4394 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4395 vabdq_s8 (int8x16_t a, int8x16_t b)
4397 int8x16_t result;
4398 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4399 : "=w"(result)
4400 : "w"(a), "w"(b)
4401 : /* No clobbers */);
4402 return result;
4405 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4406 vabdq_s16 (int16x8_t a, int16x8_t b)
4408 int16x8_t result;
4409 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4410 : "=w"(result)
4411 : "w"(a), "w"(b)
4412 : /* No clobbers */);
4413 return result;
4416 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4417 vabdq_s32 (int32x4_t a, int32x4_t b)
4419 int32x4_t result;
4420 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4421 : "=w"(result)
4422 : "w"(a), "w"(b)
4423 : /* No clobbers */);
4424 return result;
4427 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4428 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4430 uint8x16_t result;
4431 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4432 : "=w"(result)
4433 : "w"(a), "w"(b)
4434 : /* No clobbers */);
4435 return result;
4438 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4439 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4441 uint16x8_t result;
4442 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4443 : "=w"(result)
4444 : "w"(a), "w"(b)
4445 : /* No clobbers */);
4446 return result;
4449 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4450 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4452 uint32x4_t result;
4453 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4454 : "=w"(result)
4455 : "w"(a), "w"(b)
4456 : /* No clobbers */);
4457 return result;
4460 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4461 vabds_f32 (float32_t a, float32_t b)
4463 float32_t result;
4464 __asm__ ("fabd %s0, %s1, %s2"
4465 : "=w"(result)
4466 : "w"(a), "w"(b)
4467 : /* No clobbers */);
4468 return result;
4471 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4472 vabs_s8 (int8x8_t a)
4474 int8x8_t result;
4475 __asm__ ("abs %0.8b,%1.8b"
4476 : "=w"(result)
4477 : "w"(a)
4478 : /* No clobbers */);
4479 return result;
4482 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4483 vabs_s16 (int16x4_t a)
4485 int16x4_t result;
4486 __asm__ ("abs %0.4h,%1.4h"
4487 : "=w"(result)
4488 : "w"(a)
4489 : /* No clobbers */);
4490 return result;
4493 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4494 vabs_s32 (int32x2_t a)
4496 int32x2_t result;
4497 __asm__ ("abs %0.2s,%1.2s"
4498 : "=w"(result)
4499 : "w"(a)
4500 : /* No clobbers */);
4501 return result;
4504 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4505 vabsq_s8 (int8x16_t a)
4507 int8x16_t result;
4508 __asm__ ("abs %0.16b,%1.16b"
4509 : "=w"(result)
4510 : "w"(a)
4511 : /* No clobbers */);
4512 return result;
4515 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4516 vabsq_s16 (int16x8_t a)
4518 int16x8_t result;
4519 __asm__ ("abs %0.8h,%1.8h"
4520 : "=w"(result)
4521 : "w"(a)
4522 : /* No clobbers */);
4523 return result;
4526 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4527 vabsq_s32 (int32x4_t a)
4529 int32x4_t result;
4530 __asm__ ("abs %0.4s,%1.4s"
4531 : "=w"(result)
4532 : "w"(a)
4533 : /* No clobbers */);
4534 return result;
4537 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4538 vabsq_s64 (int64x2_t a)
4540 int64x2_t result;
4541 __asm__ ("abs %0.2d,%1.2d"
4542 : "=w"(result)
4543 : "w"(a)
4544 : /* No clobbers */);
4545 return result;
4548 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4549 vacged_f64 (float64_t a, float64_t b)
4551 float64_t result;
4552 __asm__ ("facge %d0,%d1,%d2"
4553 : "=w"(result)
4554 : "w"(a), "w"(b)
4555 : /* No clobbers */);
4556 return result;
4559 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4560 vacges_f32 (float32_t a, float32_t b)
4562 float32_t result;
4563 __asm__ ("facge %s0,%s1,%s2"
4564 : "=w"(result)
4565 : "w"(a), "w"(b)
4566 : /* No clobbers */);
4567 return result;
4570 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4571 vacgtd_f64 (float64_t a, float64_t b)
4573 float64_t result;
4574 __asm__ ("facgt %d0,%d1,%d2"
4575 : "=w"(result)
4576 : "w"(a), "w"(b)
4577 : /* No clobbers */);
4578 return result;
4581 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4582 vacgts_f32 (float32_t a, float32_t b)
4584 float32_t result;
4585 __asm__ ("facgt %s0,%s1,%s2"
4586 : "=w"(result)
4587 : "w"(a), "w"(b)
4588 : /* No clobbers */);
4589 return result;
4592 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4593 vaddlv_s8 (int8x8_t a)
4595 int16_t result;
4596 __asm__ ("saddlv %h0,%1.8b"
4597 : "=w"(result)
4598 : "w"(a)
4599 : /* No clobbers */);
4600 return result;
4603 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4604 vaddlv_s16 (int16x4_t a)
4606 int32_t result;
4607 __asm__ ("saddlv %s0,%1.4h"
4608 : "=w"(result)
4609 : "w"(a)
4610 : /* No clobbers */);
4611 return result;
4614 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4615 vaddlv_u8 (uint8x8_t a)
4617 uint16_t result;
4618 __asm__ ("uaddlv %h0,%1.8b"
4619 : "=w"(result)
4620 : "w"(a)
4621 : /* No clobbers */);
4622 return result;
4625 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4626 vaddlv_u16 (uint16x4_t a)
4628 uint32_t result;
4629 __asm__ ("uaddlv %s0,%1.4h"
4630 : "=w"(result)
4631 : "w"(a)
4632 : /* No clobbers */);
4633 return result;
4636 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4637 vaddlvq_s8 (int8x16_t a)
4639 int16_t result;
4640 __asm__ ("saddlv %h0,%1.16b"
4641 : "=w"(result)
4642 : "w"(a)
4643 : /* No clobbers */);
4644 return result;
4647 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4648 vaddlvq_s16 (int16x8_t a)
4650 int32_t result;
4651 __asm__ ("saddlv %s0,%1.8h"
4652 : "=w"(result)
4653 : "w"(a)
4654 : /* No clobbers */);
4655 return result;
4658 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
4659 vaddlvq_s32 (int32x4_t a)
4661 int64_t result;
4662 __asm__ ("saddlv %d0,%1.4s"
4663 : "=w"(result)
4664 : "w"(a)
4665 : /* No clobbers */);
4666 return result;
4669 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4670 vaddlvq_u8 (uint8x16_t a)
4672 uint16_t result;
4673 __asm__ ("uaddlv %h0,%1.16b"
4674 : "=w"(result)
4675 : "w"(a)
4676 : /* No clobbers */);
4677 return result;
4680 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4681 vaddlvq_u16 (uint16x8_t a)
4683 uint32_t result;
4684 __asm__ ("uaddlv %s0,%1.8h"
4685 : "=w"(result)
4686 : "w"(a)
4687 : /* No clobbers */);
4688 return result;
4691 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
4692 vaddlvq_u32 (uint32x4_t a)
4694 uint64_t result;
4695 __asm__ ("uaddlv %d0,%1.4s"
4696 : "=w"(result)
4697 : "w"(a)
4698 : /* No clobbers */);
4699 return result;
4702 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
4703 vaddv_s8 (int8x8_t a)
4705 int8_t result;
4706 __asm__ ("addv %b0,%1.8b"
4707 : "=w"(result)
4708 : "w"(a)
4709 : /* No clobbers */);
4710 return result;
4713 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4714 vaddv_s16 (int16x4_t a)
4716 int16_t result;
4717 __asm__ ("addv %h0,%1.4h"
4718 : "=w"(result)
4719 : "w"(a)
4720 : /* No clobbers */);
4721 return result;
4724 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
4725 vaddv_u8 (uint8x8_t a)
4727 uint8_t result;
4728 __asm__ ("addv %b0,%1.8b"
4729 : "=w"(result)
4730 : "w"(a)
4731 : /* No clobbers */);
4732 return result;
4735 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4736 vaddv_u16 (uint16x4_t a)
4738 uint16_t result;
4739 __asm__ ("addv %h0,%1.4h"
4740 : "=w"(result)
4741 : "w"(a)
4742 : /* No clobbers */);
4743 return result;
4746 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
4747 vaddvq_s8 (int8x16_t a)
4749 int8_t result;
4750 __asm__ ("addv %b0,%1.16b"
4751 : "=w"(result)
4752 : "w"(a)
4753 : /* No clobbers */);
4754 return result;
4757 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4758 vaddvq_s16 (int16x8_t a)
4760 int16_t result;
4761 __asm__ ("addv %h0,%1.8h"
4762 : "=w"(result)
4763 : "w"(a)
4764 : /* No clobbers */);
4765 return result;
4768 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4769 vaddvq_s32 (int32x4_t a)
4771 int32_t result;
4772 __asm__ ("addv %s0,%1.4s"
4773 : "=w"(result)
4774 : "w"(a)
4775 : /* No clobbers */);
4776 return result;
4779 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
4780 vaddvq_u8 (uint8x16_t a)
4782 uint8_t result;
4783 __asm__ ("addv %b0,%1.16b"
4784 : "=w"(result)
4785 : "w"(a)
4786 : /* No clobbers */);
4787 return result;
4790 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4791 vaddvq_u16 (uint16x8_t a)
4793 uint16_t result;
4794 __asm__ ("addv %h0,%1.8h"
4795 : "=w"(result)
4796 : "w"(a)
4797 : /* No clobbers */);
4798 return result;
4801 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4802 vaddvq_u32 (uint32x4_t a)
4804 uint32_t result;
4805 __asm__ ("addv %s0,%1.4s"
4806 : "=w"(result)
4807 : "w"(a)
4808 : /* No clobbers */);
4809 return result;
4812 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4813 vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
4815 float32x2_t result;
4816 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4817 : "=w"(result)
4818 : "0"(a), "w"(b), "w"(c)
4819 : /* No clobbers */);
4820 return result;
4823 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4824 vbsl_p8 (uint8x8_t a, poly8x8_t b, poly8x8_t c)
4826 poly8x8_t result;
4827 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4828 : "=w"(result)
4829 : "0"(a), "w"(b), "w"(c)
4830 : /* No clobbers */);
4831 return result;
4834 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4835 vbsl_p16 (uint16x4_t a, poly16x4_t b, poly16x4_t c)
4837 poly16x4_t result;
4838 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4839 : "=w"(result)
4840 : "0"(a), "w"(b), "w"(c)
4841 : /* No clobbers */);
4842 return result;
4845 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4846 vbsl_s8 (uint8x8_t a, int8x8_t b, int8x8_t c)
4848 int8x8_t result;
4849 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4850 : "=w"(result)
4851 : "0"(a), "w"(b), "w"(c)
4852 : /* No clobbers */);
4853 return result;
4856 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4857 vbsl_s16 (uint16x4_t a, int16x4_t b, int16x4_t c)
4859 int16x4_t result;
4860 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4861 : "=w"(result)
4862 : "0"(a), "w"(b), "w"(c)
4863 : /* No clobbers */);
4864 return result;
4867 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4868 vbsl_s32 (uint32x2_t a, int32x2_t b, int32x2_t c)
4870 int32x2_t result;
4871 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4872 : "=w"(result)
4873 : "0"(a), "w"(b), "w"(c)
4874 : /* No clobbers */);
4875 return result;
4878 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4879 vbsl_s64 (uint64x1_t a, int64x1_t b, int64x1_t c)
4881 int64x1_t result;
4882 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4883 : "=w"(result)
4884 : "0"(a), "w"(b), "w"(c)
4885 : /* No clobbers */);
4886 return result;
4889 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4890 vbsl_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4892 uint8x8_t result;
4893 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4894 : "=w"(result)
4895 : "0"(a), "w"(b), "w"(c)
4896 : /* No clobbers */);
4897 return result;
4900 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4901 vbsl_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4903 uint16x4_t result;
4904 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4905 : "=w"(result)
4906 : "0"(a), "w"(b), "w"(c)
4907 : /* No clobbers */);
4908 return result;
4911 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4912 vbsl_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4914 uint32x2_t result;
4915 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4916 : "=w"(result)
4917 : "0"(a), "w"(b), "w"(c)
4918 : /* No clobbers */);
4919 return result;
4922 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4923 vbsl_u64 (uint64x1_t a, uint64x1_t b, uint64x1_t c)
4925 uint64x1_t result;
4926 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4927 : "=w"(result)
4928 : "0"(a), "w"(b), "w"(c)
4929 : /* No clobbers */);
4930 return result;
4933 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4934 vbslq_f32 (uint32x4_t a, float32x4_t b, float32x4_t c)
4936 float32x4_t result;
4937 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4938 : "=w"(result)
4939 : "0"(a), "w"(b), "w"(c)
4940 : /* No clobbers */);
4941 return result;
4944 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4945 vbslq_f64 (uint64x2_t a, float64x2_t b, float64x2_t c)
4947 float64x2_t result;
4948 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4949 : "=w"(result)
4950 : "0"(a), "w"(b), "w"(c)
4951 : /* No clobbers */);
4952 return result;
4955 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4956 vbslq_p8 (uint8x16_t a, poly8x16_t b, poly8x16_t c)
4958 poly8x16_t result;
4959 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4960 : "=w"(result)
4961 : "0"(a), "w"(b), "w"(c)
4962 : /* No clobbers */);
4963 return result;
4966 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4967 vbslq_p16 (uint16x8_t a, poly16x8_t b, poly16x8_t c)
4969 poly16x8_t result;
4970 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4971 : "=w"(result)
4972 : "0"(a), "w"(b), "w"(c)
4973 : /* No clobbers */);
4974 return result;
4977 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4978 vbslq_s8 (uint8x16_t a, int8x16_t b, int8x16_t c)
4980 int8x16_t result;
4981 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4982 : "=w"(result)
4983 : "0"(a), "w"(b), "w"(c)
4984 : /* No clobbers */);
4985 return result;
4988 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4989 vbslq_s16 (uint16x8_t a, int16x8_t b, int16x8_t c)
4991 int16x8_t result;
4992 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4993 : "=w"(result)
4994 : "0"(a), "w"(b), "w"(c)
4995 : /* No clobbers */);
4996 return result;
4999 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5000 vbslq_s32 (uint32x4_t a, int32x4_t b, int32x4_t c)
5002 int32x4_t result;
5003 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5004 : "=w"(result)
5005 : "0"(a), "w"(b), "w"(c)
5006 : /* No clobbers */);
5007 return result;
5010 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
5011 vbslq_s64 (uint64x2_t a, int64x2_t b, int64x2_t c)
5013 int64x2_t result;
5014 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5015 : "=w"(result)
5016 : "0"(a), "w"(b), "w"(c)
5017 : /* No clobbers */);
5018 return result;
5021 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5022 vbslq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
5024 uint8x16_t result;
5025 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5026 : "=w"(result)
5027 : "0"(a), "w"(b), "w"(c)
5028 : /* No clobbers */);
5029 return result;
5032 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5033 vbslq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
5035 uint16x8_t result;
5036 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5037 : "=w"(result)
5038 : "0"(a), "w"(b), "w"(c)
5039 : /* No clobbers */);
5040 return result;
5043 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5044 vbslq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
5046 uint32x4_t result;
5047 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5048 : "=w"(result)
5049 : "0"(a), "w"(b), "w"(c)
5050 : /* No clobbers */);
5051 return result;
5054 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5055 vbslq_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
5057 uint64x2_t result;
5058 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
5059 : "=w"(result)
5060 : "0"(a), "w"(b), "w"(c)
5061 : /* No clobbers */);
5062 return result;
5065 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5066 vcage_f32 (float32x2_t a, float32x2_t b)
5068 uint32x2_t result;
5069 __asm__ ("facge %0.2s, %1.2s, %2.2s"
5070 : "=w"(result)
5071 : "w"(a), "w"(b)
5072 : /* No clobbers */);
5073 return result;
5076 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5077 vcageq_f32 (float32x4_t a, float32x4_t b)
5079 uint32x4_t result;
5080 __asm__ ("facge %0.4s, %1.4s, %2.4s"
5081 : "=w"(result)
5082 : "w"(a), "w"(b)
5083 : /* No clobbers */);
5084 return result;
5087 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5088 vcageq_f64 (float64x2_t a, float64x2_t b)
5090 uint64x2_t result;
5091 __asm__ ("facge %0.2d, %1.2d, %2.2d"
5092 : "=w"(result)
5093 : "w"(a), "w"(b)
5094 : /* No clobbers */);
5095 return result;
5098 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5099 vcagt_f32 (float32x2_t a, float32x2_t b)
5101 uint32x2_t result;
5102 __asm__ ("facgt %0.2s, %1.2s, %2.2s"
5103 : "=w"(result)
5104 : "w"(a), "w"(b)
5105 : /* No clobbers */);
5106 return result;
5109 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5110 vcagtq_f32 (float32x4_t a, float32x4_t b)
5112 uint32x4_t result;
5113 __asm__ ("facgt %0.4s, %1.4s, %2.4s"
5114 : "=w"(result)
5115 : "w"(a), "w"(b)
5116 : /* No clobbers */);
5117 return result;
5120 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5121 vcagtq_f64 (float64x2_t a, float64x2_t b)
5123 uint64x2_t result;
5124 __asm__ ("facgt %0.2d, %1.2d, %2.2d"
5125 : "=w"(result)
5126 : "w"(a), "w"(b)
5127 : /* No clobbers */);
5128 return result;
5131 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5132 vcale_f32 (float32x2_t a, float32x2_t b)
5134 uint32x2_t result;
5135 __asm__ ("facge %0.2s, %2.2s, %1.2s"
5136 : "=w"(result)
5137 : "w"(a), "w"(b)
5138 : /* No clobbers */);
5139 return result;
5142 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5143 vcaleq_f32 (float32x4_t a, float32x4_t b)
5145 uint32x4_t result;
5146 __asm__ ("facge %0.4s, %2.4s, %1.4s"
5147 : "=w"(result)
5148 : "w"(a), "w"(b)
5149 : /* No clobbers */);
5150 return result;
5153 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5154 vcaleq_f64 (float64x2_t a, float64x2_t b)
5156 uint64x2_t result;
5157 __asm__ ("facge %0.2d, %2.2d, %1.2d"
5158 : "=w"(result)
5159 : "w"(a), "w"(b)
5160 : /* No clobbers */);
5161 return result;
5164 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5165 vcalt_f32 (float32x2_t a, float32x2_t b)
5167 uint32x2_t result;
5168 __asm__ ("facgt %0.2s, %2.2s, %1.2s"
5169 : "=w"(result)
5170 : "w"(a), "w"(b)
5171 : /* No clobbers */);
5172 return result;
5175 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5176 vcaltq_f32 (float32x4_t a, float32x4_t b)
5178 uint32x4_t result;
5179 __asm__ ("facgt %0.4s, %2.4s, %1.4s"
5180 : "=w"(result)
5181 : "w"(a), "w"(b)
5182 : /* No clobbers */);
5183 return result;
5186 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5187 vcaltq_f64 (float64x2_t a, float64x2_t b)
5189 uint64x2_t result;
5190 __asm__ ("facgt %0.2d, %2.2d, %1.2d"
5191 : "=w"(result)
5192 : "w"(a), "w"(b)
5193 : /* No clobbers */);
5194 return result;
5197 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5198 vceq_f32 (float32x2_t a, float32x2_t b)
5200 uint32x2_t result;
5201 __asm__ ("fcmeq %0.2s, %1.2s, %2.2s"
5202 : "=w"(result)
5203 : "w"(a), "w"(b)
5204 : /* No clobbers */);
5205 return result;
5208 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5209 vceq_f64 (float64x1_t a, float64x1_t b)
5211 uint64x1_t result;
5212 __asm__ ("fcmeq %d0, %d1, %d2"
5213 : "=w"(result)
5214 : "w"(a), "w"(b)
5215 : /* No clobbers */);
5216 return result;
5219 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
5220 vceqd_f64 (float64_t a, float64_t b)
5222 float64_t result;
5223 __asm__ ("fcmeq %d0,%d1,%d2"
5224 : "=w"(result)
5225 : "w"(a), "w"(b)
5226 : /* No clobbers */);
5227 return result;
5230 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5231 vceqq_f32 (float32x4_t a, float32x4_t b)
5233 uint32x4_t result;
5234 __asm__ ("fcmeq %0.4s, %1.4s, %2.4s"
5235 : "=w"(result)
5236 : "w"(a), "w"(b)
5237 : /* No clobbers */);
5238 return result;
5241 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5242 vceqq_f64 (float64x2_t a, float64x2_t b)
5244 uint64x2_t result;
5245 __asm__ ("fcmeq %0.2d, %1.2d, %2.2d"
5246 : "=w"(result)
5247 : "w"(a), "w"(b)
5248 : /* No clobbers */);
5249 return result;
5252 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5253 vceqs_f32 (float32_t a, float32_t b)
5255 float32_t result;
5256 __asm__ ("fcmeq %s0,%s1,%s2"
5257 : "=w"(result)
5258 : "w"(a), "w"(b)
5259 : /* No clobbers */);
5260 return result;
5263 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
5264 vceqzd_f64 (float64_t a)
5266 float64_t result;
5267 __asm__ ("fcmeq %d0,%d1,#0"
5268 : "=w"(result)
5269 : "w"(a)
5270 : /* No clobbers */);
5271 return result;
5274 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5275 vceqzs_f32 (float32_t a)
5277 float32_t result;
5278 __asm__ ("fcmeq %s0,%s1,#0"
5279 : "=w"(result)
5280 : "w"(a)
5281 : /* No clobbers */);
5282 return result;
5285 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5286 vcge_f32 (float32x2_t a, float32x2_t b)
5288 uint32x2_t result;
5289 __asm__ ("fcmge %0.2s, %1.2s, %2.2s"
5290 : "=w"(result)
5291 : "w"(a), "w"(b)
5292 : /* No clobbers */);
5293 return result;
5296 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5297 vcge_f64 (float64x1_t a, float64x1_t b)
5299 uint64x1_t result;
5300 __asm__ ("fcmge %d0, %d1, %d2"
5301 : "=w"(result)
5302 : "w"(a), "w"(b)
5303 : /* No clobbers */);
5304 return result;
5307 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5308 vcgeq_f32 (float32x4_t a, float32x4_t b)
5310 uint32x4_t result;
5311 __asm__ ("fcmge %0.4s, %1.4s, %2.4s"
5312 : "=w"(result)
5313 : "w"(a), "w"(b)
5314 : /* No clobbers */);
5315 return result;
5318 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5319 vcgeq_f64 (float64x2_t a, float64x2_t b)
5321 uint64x2_t result;
5322 __asm__ ("fcmge %0.2d, %1.2d, %2.2d"
5323 : "=w"(result)
5324 : "w"(a), "w"(b)
5325 : /* No clobbers */);
5326 return result;
5329 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5330 vcgt_f32 (float32x2_t a, float32x2_t b)
5332 uint32x2_t result;
5333 __asm__ ("fcmgt %0.2s, %1.2s, %2.2s"
5334 : "=w"(result)
5335 : "w"(a), "w"(b)
5336 : /* No clobbers */);
5337 return result;
5340 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5341 vcgt_f64 (float64x1_t a, float64x1_t b)
5343 uint64x1_t result;
5344 __asm__ ("fcmgt %d0, %d1, %d2"
5345 : "=w"(result)
5346 : "w"(a), "w"(b)
5347 : /* No clobbers */);
5348 return result;
5351 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5352 vcgtq_f32 (float32x4_t a, float32x4_t b)
5354 uint32x4_t result;
5355 __asm__ ("fcmgt %0.4s, %1.4s, %2.4s"
5356 : "=w"(result)
5357 : "w"(a), "w"(b)
5358 : /* No clobbers */);
5359 return result;
5362 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5363 vcgtq_f64 (float64x2_t a, float64x2_t b)
5365 uint64x2_t result;
5366 __asm__ ("fcmgt %0.2d, %1.2d, %2.2d"
5367 : "=w"(result)
5368 : "w"(a), "w"(b)
5369 : /* No clobbers */);
5370 return result;
5373 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5374 vcle_f32 (float32x2_t a, float32x2_t b)
5376 uint32x2_t result;
5377 __asm__ ("fcmge %0.2s, %2.2s, %1.2s"
5378 : "=w"(result)
5379 : "w"(a), "w"(b)
5380 : /* No clobbers */);
5381 return result;
5384 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5385 vcle_f64 (float64x1_t a, float64x1_t b)
5387 uint64x1_t result;
5388 __asm__ ("fcmge %d0, %d2, %d1"
5389 : "=w"(result)
5390 : "w"(a), "w"(b)
5391 : /* No clobbers */);
5392 return result;
5395 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5396 vcleq_f32 (float32x4_t a, float32x4_t b)
5398 uint32x4_t result;
5399 __asm__ ("fcmge %0.4s, %2.4s, %1.4s"
5400 : "=w"(result)
5401 : "w"(a), "w"(b)
5402 : /* No clobbers */);
5403 return result;
5406 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5407 vcleq_f64 (float64x2_t a, float64x2_t b)
5409 uint64x2_t result;
5410 __asm__ ("fcmge %0.2d, %2.2d, %1.2d"
5411 : "=w"(result)
5412 : "w"(a), "w"(b)
5413 : /* No clobbers */);
5414 return result;
5417 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5418 vcls_s8 (int8x8_t a)
5420 int8x8_t result;
5421 __asm__ ("cls %0.8b,%1.8b"
5422 : "=w"(result)
5423 : "w"(a)
5424 : /* No clobbers */);
5425 return result;
5428 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5429 vcls_s16 (int16x4_t a)
5431 int16x4_t result;
5432 __asm__ ("cls %0.4h,%1.4h"
5433 : "=w"(result)
5434 : "w"(a)
5435 : /* No clobbers */);
5436 return result;
5439 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5440 vcls_s32 (int32x2_t a)
5442 int32x2_t result;
5443 __asm__ ("cls %0.2s,%1.2s"
5444 : "=w"(result)
5445 : "w"(a)
5446 : /* No clobbers */);
5447 return result;
5450 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5451 vclsq_s8 (int8x16_t a)
5453 int8x16_t result;
5454 __asm__ ("cls %0.16b,%1.16b"
5455 : "=w"(result)
5456 : "w"(a)
5457 : /* No clobbers */);
5458 return result;
5461 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5462 vclsq_s16 (int16x8_t a)
5464 int16x8_t result;
5465 __asm__ ("cls %0.8h,%1.8h"
5466 : "=w"(result)
5467 : "w"(a)
5468 : /* No clobbers */);
5469 return result;
5472 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5473 vclsq_s32 (int32x4_t a)
5475 int32x4_t result;
5476 __asm__ ("cls %0.4s,%1.4s"
5477 : "=w"(result)
5478 : "w"(a)
5479 : /* No clobbers */);
5480 return result;
5483 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5484 vclt_f32 (float32x2_t a, float32x2_t b)
5486 uint32x2_t result;
5487 __asm__ ("fcmgt %0.2s, %2.2s, %1.2s"
5488 : "=w"(result)
5489 : "w"(a), "w"(b)
5490 : /* No clobbers */);
5491 return result;
5494 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5495 vclt_f64 (float64x1_t a, float64x1_t b)
5497 uint64x1_t result;
5498 __asm__ ("fcmgt %d0, %d2, %d1"
5499 : "=w"(result)
5500 : "w"(a), "w"(b)
5501 : /* No clobbers */);
5502 return result;
5505 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5506 vcltq_f32 (float32x4_t a, float32x4_t b)
5508 uint32x4_t result;
5509 __asm__ ("fcmgt %0.4s, %2.4s, %1.4s"
5510 : "=w"(result)
5511 : "w"(a), "w"(b)
5512 : /* No clobbers */);
5513 return result;
5516 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
5517 vcltq_f64 (float64x2_t a, float64x2_t b)
5519 uint64x2_t result;
5520 __asm__ ("fcmgt %0.2d, %2.2d, %1.2d"
5521 : "=w"(result)
5522 : "w"(a), "w"(b)
5523 : /* No clobbers */);
5524 return result;
5527 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5528 vclz_s8 (int8x8_t a)
5530 int8x8_t result;
5531 __asm__ ("clz %0.8b,%1.8b"
5532 : "=w"(result)
5533 : "w"(a)
5534 : /* No clobbers */);
5535 return result;
5538 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5539 vclz_s16 (int16x4_t a)
5541 int16x4_t result;
5542 __asm__ ("clz %0.4h,%1.4h"
5543 : "=w"(result)
5544 : "w"(a)
5545 : /* No clobbers */);
5546 return result;
5549 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5550 vclz_s32 (int32x2_t a)
5552 int32x2_t result;
5553 __asm__ ("clz %0.2s,%1.2s"
5554 : "=w"(result)
5555 : "w"(a)
5556 : /* No clobbers */);
5557 return result;
5560 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5561 vclz_u8 (uint8x8_t a)
5563 uint8x8_t result;
5564 __asm__ ("clz %0.8b,%1.8b"
5565 : "=w"(result)
5566 : "w"(a)
5567 : /* No clobbers */);
5568 return result;
5571 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5572 vclz_u16 (uint16x4_t a)
5574 uint16x4_t result;
5575 __asm__ ("clz %0.4h,%1.4h"
5576 : "=w"(result)
5577 : "w"(a)
5578 : /* No clobbers */);
5579 return result;
5582 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5583 vclz_u32 (uint32x2_t a)
5585 uint32x2_t result;
5586 __asm__ ("clz %0.2s,%1.2s"
5587 : "=w"(result)
5588 : "w"(a)
5589 : /* No clobbers */);
5590 return result;
5593 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5594 vclzq_s8 (int8x16_t a)
5596 int8x16_t result;
5597 __asm__ ("clz %0.16b,%1.16b"
5598 : "=w"(result)
5599 : "w"(a)
5600 : /* No clobbers */);
5601 return result;
5604 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5605 vclzq_s16 (int16x8_t a)
5607 int16x8_t result;
5608 __asm__ ("clz %0.8h,%1.8h"
5609 : "=w"(result)
5610 : "w"(a)
5611 : /* No clobbers */);
5612 return result;
5615 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5616 vclzq_s32 (int32x4_t a)
5618 int32x4_t result;
5619 __asm__ ("clz %0.4s,%1.4s"
5620 : "=w"(result)
5621 : "w"(a)
5622 : /* No clobbers */);
5623 return result;
5626 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5627 vclzq_u8 (uint8x16_t a)
5629 uint8x16_t result;
5630 __asm__ ("clz %0.16b,%1.16b"
5631 : "=w"(result)
5632 : "w"(a)
5633 : /* No clobbers */);
5634 return result;
5637 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5638 vclzq_u16 (uint16x8_t a)
5640 uint16x8_t result;
5641 __asm__ ("clz %0.8h,%1.8h"
5642 : "=w"(result)
5643 : "w"(a)
5644 : /* No clobbers */);
5645 return result;
5648 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5649 vclzq_u32 (uint32x4_t a)
5651 uint32x4_t result;
5652 __asm__ ("clz %0.4s,%1.4s"
5653 : "=w"(result)
5654 : "w"(a)
5655 : /* No clobbers */);
5656 return result;
5659 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5660 vcnt_p8 (poly8x8_t a)
5662 poly8x8_t result;
5663 __asm__ ("cnt %0.8b,%1.8b"
5664 : "=w"(result)
5665 : "w"(a)
5666 : /* No clobbers */);
5667 return result;
5670 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5671 vcnt_s8 (int8x8_t a)
5673 int8x8_t result;
5674 __asm__ ("cnt %0.8b,%1.8b"
5675 : "=w"(result)
5676 : "w"(a)
5677 : /* No clobbers */);
5678 return result;
5681 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5682 vcnt_u8 (uint8x8_t a)
5684 uint8x8_t result;
5685 __asm__ ("cnt %0.8b,%1.8b"
5686 : "=w"(result)
5687 : "w"(a)
5688 : /* No clobbers */);
5689 return result;
5692 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5693 vcntq_p8 (poly8x16_t a)
5695 poly8x16_t result;
5696 __asm__ ("cnt %0.16b,%1.16b"
5697 : "=w"(result)
5698 : "w"(a)
5699 : /* No clobbers */);
5700 return result;
5703 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5704 vcntq_s8 (int8x16_t a)
5706 int8x16_t result;
5707 __asm__ ("cnt %0.16b,%1.16b"
5708 : "=w"(result)
5709 : "w"(a)
5710 : /* No clobbers */);
5711 return result;
5714 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5715 vcntq_u8 (uint8x16_t a)
5717 uint8x16_t result;
5718 __asm__ ("cnt %0.16b,%1.16b"
5719 : "=w"(result)
5720 : "w"(a)
5721 : /* No clobbers */);
5722 return result;
5725 #define vcopyq_lane_f32(a, b, c, d) \
5726 __extension__ \
5727 ({ \
5728 float32x4_t c_ = (c); \
5729 float32x4_t a_ = (a); \
5730 float32x4_t result; \
5731 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5732 : "=w"(result) \
5733 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5734 : /* No clobbers */); \
5735 result; \
5738 #define vcopyq_lane_f64(a, b, c, d) \
5739 __extension__ \
5740 ({ \
5741 float64x2_t c_ = (c); \
5742 float64x2_t a_ = (a); \
5743 float64x2_t result; \
5744 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5745 : "=w"(result) \
5746 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5747 : /* No clobbers */); \
5748 result; \
5751 #define vcopyq_lane_p8(a, b, c, d) \
5752 __extension__ \
5753 ({ \
5754 poly8x16_t c_ = (c); \
5755 poly8x16_t a_ = (a); \
5756 poly8x16_t result; \
5757 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5758 : "=w"(result) \
5759 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5760 : /* No clobbers */); \
5761 result; \
5764 #define vcopyq_lane_p16(a, b, c, d) \
5765 __extension__ \
5766 ({ \
5767 poly16x8_t c_ = (c); \
5768 poly16x8_t a_ = (a); \
5769 poly16x8_t result; \
5770 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5771 : "=w"(result) \
5772 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5773 : /* No clobbers */); \
5774 result; \
5777 #define vcopyq_lane_s8(a, b, c, d) \
5778 __extension__ \
5779 ({ \
5780 int8x16_t c_ = (c); \
5781 int8x16_t a_ = (a); \
5782 int8x16_t result; \
5783 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5784 : "=w"(result) \
5785 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5786 : /* No clobbers */); \
5787 result; \
5790 #define vcopyq_lane_s16(a, b, c, d) \
5791 __extension__ \
5792 ({ \
5793 int16x8_t c_ = (c); \
5794 int16x8_t a_ = (a); \
5795 int16x8_t result; \
5796 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5797 : "=w"(result) \
5798 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5799 : /* No clobbers */); \
5800 result; \
5803 #define vcopyq_lane_s32(a, b, c, d) \
5804 __extension__ \
5805 ({ \
5806 int32x4_t c_ = (c); \
5807 int32x4_t a_ = (a); \
5808 int32x4_t result; \
5809 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5810 : "=w"(result) \
5811 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5812 : /* No clobbers */); \
5813 result; \
5816 #define vcopyq_lane_s64(a, b, c, d) \
5817 __extension__ \
5818 ({ \
5819 int64x2_t c_ = (c); \
5820 int64x2_t a_ = (a); \
5821 int64x2_t result; \
5822 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5823 : "=w"(result) \
5824 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5825 : /* No clobbers */); \
5826 result; \
5829 #define vcopyq_lane_u8(a, b, c, d) \
5830 __extension__ \
5831 ({ \
5832 uint8x16_t c_ = (c); \
5833 uint8x16_t a_ = (a); \
5834 uint8x16_t result; \
5835 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5836 : "=w"(result) \
5837 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5838 : /* No clobbers */); \
5839 result; \
5842 #define vcopyq_lane_u16(a, b, c, d) \
5843 __extension__ \
5844 ({ \
5845 uint16x8_t c_ = (c); \
5846 uint16x8_t a_ = (a); \
5847 uint16x8_t result; \
5848 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5849 : "=w"(result) \
5850 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5851 : /* No clobbers */); \
5852 result; \
5855 #define vcopyq_lane_u32(a, b, c, d) \
5856 __extension__ \
5857 ({ \
5858 uint32x4_t c_ = (c); \
5859 uint32x4_t a_ = (a); \
5860 uint32x4_t result; \
5861 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5862 : "=w"(result) \
5863 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5864 : /* No clobbers */); \
5865 result; \
5868 #define vcopyq_lane_u64(a, b, c, d) \
5869 __extension__ \
5870 ({ \
5871 uint64x2_t c_ = (c); \
5872 uint64x2_t a_ = (a); \
5873 uint64x2_t result; \
5874 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5875 : "=w"(result) \
5876 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5877 : /* No clobbers */); \
5878 result; \
5881 /* vcvt_f16_f32 not supported */
5883 /* vcvt_f32_f16 not supported */
5885 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5886 vcvt_f32_f64 (float64x2_t a)
5888 float32x2_t result;
5889 __asm__ ("fcvtn %0.2s,%1.2d"
5890 : "=w"(result)
5891 : "w"(a)
5892 : /* No clobbers */);
5893 return result;
5896 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5897 vcvt_f32_s32 (int32x2_t a)
5899 float32x2_t result;
5900 __asm__ ("scvtf %0.2s, %1.2s"
5901 : "=w"(result)
5902 : "w"(a)
5903 : /* No clobbers */);
5904 return result;
5907 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5908 vcvt_f32_u32 (uint32x2_t a)
5910 float32x2_t result;
5911 __asm__ ("ucvtf %0.2s, %1.2s"
5912 : "=w"(result)
5913 : "w"(a)
5914 : /* No clobbers */);
5915 return result;
5918 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5919 vcvt_f64_f32 (float32x2_t a)
5921 float64x2_t result;
5922 __asm__ ("fcvtl %0.2d,%1.2s"
5923 : "=w"(result)
5924 : "w"(a)
5925 : /* No clobbers */);
5926 return result;
5929 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5930 vcvt_f64_s64 (uint64x1_t a)
5932 float64x1_t result;
5933 __asm__ ("scvtf %d0, %d1"
5934 : "=w"(result)
5935 : "w"(a)
5936 : /* No clobbers */);
5937 return result;
5940 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5941 vcvt_f64_u64 (uint64x1_t a)
5943 float64x1_t result;
5944 __asm__ ("ucvtf %d0, %d1"
5945 : "=w"(result)
5946 : "w"(a)
5947 : /* No clobbers */);
5948 return result;
5951 /* vcvt_high_f16_f32 not supported */
5953 /* vcvt_high_f32_f16 not supported */
5955 static float32x2_t vdup_n_f32 (float32_t);
5957 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5958 vcvt_high_f32_f64 (float32x2_t a, float64x2_t b)
5960 float32x4_t result = vcombine_f32 (a, vdup_n_f32 (0.0f));
5961 __asm__ ("fcvtn2 %0.4s,%2.2d"
5962 : "+w"(result)
5963 : "w"(b)
5964 : /* No clobbers */);
5965 return result;
5968 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5969 vcvt_high_f64_f32 (float32x4_t a)
5971 float64x2_t result;
5972 __asm__ ("fcvtl2 %0.2d,%1.4s"
5973 : "=w"(result)
5974 : "w"(a)
5975 : /* No clobbers */);
5976 return result;
5979 #define vcvt_n_f32_s32(a, b) \
5980 __extension__ \
5981 ({ \
5982 int32x2_t a_ = (a); \
5983 float32x2_t result; \
5984 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5985 : "=w"(result) \
5986 : "w"(a_), "i"(b) \
5987 : /* No clobbers */); \
5988 result; \
5991 #define vcvt_n_f32_u32(a, b) \
5992 __extension__ \
5993 ({ \
5994 uint32x2_t a_ = (a); \
5995 float32x2_t result; \
5996 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5997 : "=w"(result) \
5998 : "w"(a_), "i"(b) \
5999 : /* No clobbers */); \
6000 result; \
6003 #define vcvt_n_s32_f32(a, b) \
6004 __extension__ \
6005 ({ \
6006 float32x2_t a_ = (a); \
6007 int32x2_t result; \
6008 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
6009 : "=w"(result) \
6010 : "w"(a_), "i"(b) \
6011 : /* No clobbers */); \
6012 result; \
6015 #define vcvt_n_u32_f32(a, b) \
6016 __extension__ \
6017 ({ \
6018 float32x2_t a_ = (a); \
6019 uint32x2_t result; \
6020 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
6021 : "=w"(result) \
6022 : "w"(a_), "i"(b) \
6023 : /* No clobbers */); \
6024 result; \
6027 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6028 vcvt_s32_f32 (float32x2_t a)
6030 int32x2_t result;
6031 __asm__ ("fcvtzs %0.2s, %1.2s"
6032 : "=w"(result)
6033 : "w"(a)
6034 : /* No clobbers */);
6035 return result;
6038 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6039 vcvt_u32_f32 (float32x2_t a)
6041 uint32x2_t result;
6042 __asm__ ("fcvtzu %0.2s, %1.2s"
6043 : "=w"(result)
6044 : "w"(a)
6045 : /* No clobbers */);
6046 return result;
6049 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6050 vcvta_s32_f32 (float32x2_t a)
6052 int32x2_t result;
6053 __asm__ ("fcvtas %0.2s, %1.2s"
6054 : "=w"(result)
6055 : "w"(a)
6056 : /* No clobbers */);
6057 return result;
6060 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6061 vcvta_u32_f32 (float32x2_t a)
6063 uint32x2_t result;
6064 __asm__ ("fcvtau %0.2s, %1.2s"
6065 : "=w"(result)
6066 : "w"(a)
6067 : /* No clobbers */);
6068 return result;
6071 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6072 vcvtad_s64_f64 (float64_t a)
6074 float64_t result;
6075 __asm__ ("fcvtas %d0,%d1"
6076 : "=w"(result)
6077 : "w"(a)
6078 : /* No clobbers */);
6079 return result;
6082 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6083 vcvtad_u64_f64 (float64_t a)
6085 float64_t result;
6086 __asm__ ("fcvtau %d0,%d1"
6087 : "=w"(result)
6088 : "w"(a)
6089 : /* No clobbers */);
6090 return result;
6093 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6094 vcvtaq_s32_f32 (float32x4_t a)
6096 int32x4_t result;
6097 __asm__ ("fcvtas %0.4s, %1.4s"
6098 : "=w"(result)
6099 : "w"(a)
6100 : /* No clobbers */);
6101 return result;
6104 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6105 vcvtaq_s64_f64 (float64x2_t a)
6107 int64x2_t result;
6108 __asm__ ("fcvtas %0.2d, %1.2d"
6109 : "=w"(result)
6110 : "w"(a)
6111 : /* No clobbers */);
6112 return result;
6115 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6116 vcvtaq_u32_f32 (float32x4_t a)
6118 uint32x4_t result;
6119 __asm__ ("fcvtau %0.4s, %1.4s"
6120 : "=w"(result)
6121 : "w"(a)
6122 : /* No clobbers */);
6123 return result;
6126 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6127 vcvtaq_u64_f64 (float64x2_t a)
6129 uint64x2_t result;
6130 __asm__ ("fcvtau %0.2d, %1.2d"
6131 : "=w"(result)
6132 : "w"(a)
6133 : /* No clobbers */);
6134 return result;
6137 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6138 vcvtas_s64_f64 (float32_t a)
6140 float32_t result;
6141 __asm__ ("fcvtas %s0,%s1"
6142 : "=w"(result)
6143 : "w"(a)
6144 : /* No clobbers */);
6145 return result;
6148 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6149 vcvtas_u64_f64 (float32_t a)
6151 float32_t result;
6152 __asm__ ("fcvtau %s0,%s1"
6153 : "=w"(result)
6154 : "w"(a)
6155 : /* No clobbers */);
6156 return result;
6159 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
6160 vcvtd_f64_s64 (int64_t a)
6162 int64_t result;
6163 __asm__ ("scvtf %d0,%d1"
6164 : "=w"(result)
6165 : "w"(a)
6166 : /* No clobbers */);
6167 return result;
6170 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
6171 vcvtd_f64_u64 (uint64_t a)
6173 uint64_t result;
6174 __asm__ ("ucvtf %d0,%d1"
6175 : "=w"(result)
6176 : "w"(a)
6177 : /* No clobbers */);
6178 return result;
6181 #define vcvtd_n_f64_s64(a, b) \
6182 __extension__ \
6183 ({ \
6184 int64_t a_ = (a); \
6185 int64_t result; \
6186 __asm__ ("scvtf %d0,%d1,%2" \
6187 : "=w"(result) \
6188 : "w"(a_), "i"(b) \
6189 : /* No clobbers */); \
6190 result; \
6193 #define vcvtd_n_f64_u64(a, b) \
6194 __extension__ \
6195 ({ \
6196 uint64_t a_ = (a); \
6197 uint64_t result; \
6198 __asm__ ("ucvtf %d0,%d1,%2" \
6199 : "=w"(result) \
6200 : "w"(a_), "i"(b) \
6201 : /* No clobbers */); \
6202 result; \
6205 #define vcvtd_n_s64_f64(a, b) \
6206 __extension__ \
6207 ({ \
6208 float64_t a_ = (a); \
6209 float64_t result; \
6210 __asm__ ("fcvtzs %d0,%d1,%2" \
6211 : "=w"(result) \
6212 : "w"(a_), "i"(b) \
6213 : /* No clobbers */); \
6214 result; \
6217 #define vcvtd_n_u64_f64(a, b) \
6218 __extension__ \
6219 ({ \
6220 float64_t a_ = (a); \
6221 float64_t result; \
6222 __asm__ ("fcvtzu %d0,%d1,%2" \
6223 : "=w"(result) \
6224 : "w"(a_), "i"(b) \
6225 : /* No clobbers */); \
6226 result; \
6229 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6230 vcvtd_s64_f64 (float64_t a)
6232 float64_t result;
6233 __asm__ ("fcvtzs %d0,%d1"
6234 : "=w"(result)
6235 : "w"(a)
6236 : /* No clobbers */);
6237 return result;
6240 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6241 vcvtd_u64_f64 (float64_t a)
6243 float64_t result;
6244 __asm__ ("fcvtzu %d0,%d1"
6245 : "=w"(result)
6246 : "w"(a)
6247 : /* No clobbers */);
6248 return result;
6251 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6252 vcvtm_s32_f32 (float32x2_t a)
6254 int32x2_t result;
6255 __asm__ ("fcvtms %0.2s, %1.2s"
6256 : "=w"(result)
6257 : "w"(a)
6258 : /* No clobbers */);
6259 return result;
6262 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6263 vcvtm_u32_f32 (float32x2_t a)
6265 uint32x2_t result;
6266 __asm__ ("fcvtmu %0.2s, %1.2s"
6267 : "=w"(result)
6268 : "w"(a)
6269 : /* No clobbers */);
6270 return result;
6273 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6274 vcvtmd_s64_f64 (float64_t a)
6276 float64_t result;
6277 __asm__ ("fcvtms %d0,%d1"
6278 : "=w"(result)
6279 : "w"(a)
6280 : /* No clobbers */);
6281 return result;
6284 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6285 vcvtmd_u64_f64 (float64_t a)
6287 float64_t result;
6288 __asm__ ("fcvtmu %d0,%d1"
6289 : "=w"(result)
6290 : "w"(a)
6291 : /* No clobbers */);
6292 return result;
6295 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6296 vcvtmq_s32_f32 (float32x4_t a)
6298 int32x4_t result;
6299 __asm__ ("fcvtms %0.4s, %1.4s"
6300 : "=w"(result)
6301 : "w"(a)
6302 : /* No clobbers */);
6303 return result;
6306 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6307 vcvtmq_s64_f64 (float64x2_t a)
6309 int64x2_t result;
6310 __asm__ ("fcvtms %0.2d, %1.2d"
6311 : "=w"(result)
6312 : "w"(a)
6313 : /* No clobbers */);
6314 return result;
6317 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6318 vcvtmq_u32_f32 (float32x4_t a)
6320 uint32x4_t result;
6321 __asm__ ("fcvtmu %0.4s, %1.4s"
6322 : "=w"(result)
6323 : "w"(a)
6324 : /* No clobbers */);
6325 return result;
6328 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6329 vcvtmq_u64_f64 (float64x2_t a)
6331 uint64x2_t result;
6332 __asm__ ("fcvtmu %0.2d, %1.2d"
6333 : "=w"(result)
6334 : "w"(a)
6335 : /* No clobbers */);
6336 return result;
6339 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6340 vcvtms_s64_f64 (float32_t a)
6342 float32_t result;
6343 __asm__ ("fcvtms %s0,%s1"
6344 : "=w"(result)
6345 : "w"(a)
6346 : /* No clobbers */);
6347 return result;
6350 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6351 vcvtms_u64_f64 (float32_t a)
6353 float32_t result;
6354 __asm__ ("fcvtmu %s0,%s1"
6355 : "=w"(result)
6356 : "w"(a)
6357 : /* No clobbers */);
6358 return result;
6361 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6362 vcvtn_s32_f32 (float32x2_t a)
6364 int32x2_t result;
6365 __asm__ ("fcvtns %0.2s, %1.2s"
6366 : "=w"(result)
6367 : "w"(a)
6368 : /* No clobbers */);
6369 return result;
6372 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6373 vcvtn_u32_f32 (float32x2_t a)
6375 uint32x2_t result;
6376 __asm__ ("fcvtnu %0.2s, %1.2s"
6377 : "=w"(result)
6378 : "w"(a)
6379 : /* No clobbers */);
6380 return result;
6383 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6384 vcvtnd_s64_f64 (float64_t a)
6386 float64_t result;
6387 __asm__ ("fcvtns %d0,%d1"
6388 : "=w"(result)
6389 : "w"(a)
6390 : /* No clobbers */);
6391 return result;
6394 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6395 vcvtnd_u64_f64 (float64_t a)
6397 float64_t result;
6398 __asm__ ("fcvtnu %d0,%d1"
6399 : "=w"(result)
6400 : "w"(a)
6401 : /* No clobbers */);
6402 return result;
6405 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6406 vcvtnq_s32_f32 (float32x4_t a)
6408 int32x4_t result;
6409 __asm__ ("fcvtns %0.4s, %1.4s"
6410 : "=w"(result)
6411 : "w"(a)
6412 : /* No clobbers */);
6413 return result;
6416 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6417 vcvtnq_s64_f64 (float64x2_t a)
6419 int64x2_t result;
6420 __asm__ ("fcvtns %0.2d, %1.2d"
6421 : "=w"(result)
6422 : "w"(a)
6423 : /* No clobbers */);
6424 return result;
6427 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6428 vcvtnq_u32_f32 (float32x4_t a)
6430 uint32x4_t result;
6431 __asm__ ("fcvtnu %0.4s, %1.4s"
6432 : "=w"(result)
6433 : "w"(a)
6434 : /* No clobbers */);
6435 return result;
6438 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6439 vcvtnq_u64_f64 (float64x2_t a)
6441 uint64x2_t result;
6442 __asm__ ("fcvtnu %0.2d, %1.2d"
6443 : "=w"(result)
6444 : "w"(a)
6445 : /* No clobbers */);
6446 return result;
6449 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6450 vcvtns_s64_f64 (float32_t a)
6452 float32_t result;
6453 __asm__ ("fcvtns %s0,%s1"
6454 : "=w"(result)
6455 : "w"(a)
6456 : /* No clobbers */);
6457 return result;
6460 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6461 vcvtns_u64_f64 (float32_t a)
6463 float32_t result;
6464 __asm__ ("fcvtnu %s0,%s1"
6465 : "=w"(result)
6466 : "w"(a)
6467 : /* No clobbers */);
6468 return result;
6471 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6472 vcvtp_s32_f32 (float32x2_t a)
6474 int32x2_t result;
6475 __asm__ ("fcvtps %0.2s, %1.2s"
6476 : "=w"(result)
6477 : "w"(a)
6478 : /* No clobbers */);
6479 return result;
6482 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6483 vcvtp_u32_f32 (float32x2_t a)
6485 uint32x2_t result;
6486 __asm__ ("fcvtpu %0.2s, %1.2s"
6487 : "=w"(result)
6488 : "w"(a)
6489 : /* No clobbers */);
6490 return result;
6493 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6494 vcvtpd_s64_f64 (float64_t a)
6496 float64_t result;
6497 __asm__ ("fcvtps %d0,%d1"
6498 : "=w"(result)
6499 : "w"(a)
6500 : /* No clobbers */);
6501 return result;
6504 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
6505 vcvtpd_u64_f64 (float64_t a)
6507 float64_t result;
6508 __asm__ ("fcvtpu %d0,%d1"
6509 : "=w"(result)
6510 : "w"(a)
6511 : /* No clobbers */);
6512 return result;
6515 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6516 vcvtpq_s32_f32 (float32x4_t a)
6518 int32x4_t result;
6519 __asm__ ("fcvtps %0.4s, %1.4s"
6520 : "=w"(result)
6521 : "w"(a)
6522 : /* No clobbers */);
6523 return result;
6526 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6527 vcvtpq_s64_f64 (float64x2_t a)
6529 int64x2_t result;
6530 __asm__ ("fcvtps %0.2d, %1.2d"
6531 : "=w"(result)
6532 : "w"(a)
6533 : /* No clobbers */);
6534 return result;
6537 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6538 vcvtpq_u32_f32 (float32x4_t a)
6540 uint32x4_t result;
6541 __asm__ ("fcvtpu %0.4s, %1.4s"
6542 : "=w"(result)
6543 : "w"(a)
6544 : /* No clobbers */);
6545 return result;
6548 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6549 vcvtpq_u64_f64 (float64x2_t a)
6551 uint64x2_t result;
6552 __asm__ ("fcvtpu %0.2d, %1.2d"
6553 : "=w"(result)
6554 : "w"(a)
6555 : /* No clobbers */);
6556 return result;
6559 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6560 vcvtps_s64_f64 (float32_t a)
6562 float32_t result;
6563 __asm__ ("fcvtps %s0,%s1"
6564 : "=w"(result)
6565 : "w"(a)
6566 : /* No clobbers */);
6567 return result;
6570 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6571 vcvtps_u64_f64 (float32_t a)
6573 float32_t result;
6574 __asm__ ("fcvtpu %s0,%s1"
6575 : "=w"(result)
6576 : "w"(a)
6577 : /* No clobbers */);
6578 return result;
6581 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6582 vcvtq_f32_s32 (int32x4_t a)
6584 float32x4_t result;
6585 __asm__ ("scvtf %0.4s, %1.4s"
6586 : "=w"(result)
6587 : "w"(a)
6588 : /* No clobbers */);
6589 return result;
6592 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6593 vcvtq_f32_u32 (uint32x4_t a)
6595 float32x4_t result;
6596 __asm__ ("ucvtf %0.4s, %1.4s"
6597 : "=w"(result)
6598 : "w"(a)
6599 : /* No clobbers */);
6600 return result;
6603 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6604 vcvtq_f64_s64 (int64x2_t a)
6606 float64x2_t result;
6607 __asm__ ("scvtf %0.2d, %1.2d"
6608 : "=w"(result)
6609 : "w"(a)
6610 : /* No clobbers */);
6611 return result;
6614 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6615 vcvtq_f64_u64 (uint64x2_t a)
6617 float64x2_t result;
6618 __asm__ ("ucvtf %0.2d, %1.2d"
6619 : "=w"(result)
6620 : "w"(a)
6621 : /* No clobbers */);
6622 return result;
6625 #define vcvtq_n_f32_s32(a, b) \
6626 __extension__ \
6627 ({ \
6628 int32x4_t a_ = (a); \
6629 float32x4_t result; \
6630 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
6631 : "=w"(result) \
6632 : "w"(a_), "i"(b) \
6633 : /* No clobbers */); \
6634 result; \
6637 #define vcvtq_n_f32_u32(a, b) \
6638 __extension__ \
6639 ({ \
6640 uint32x4_t a_ = (a); \
6641 float32x4_t result; \
6642 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
6643 : "=w"(result) \
6644 : "w"(a_), "i"(b) \
6645 : /* No clobbers */); \
6646 result; \
6649 #define vcvtq_n_f64_s64(a, b) \
6650 __extension__ \
6651 ({ \
6652 int64x2_t a_ = (a); \
6653 float64x2_t result; \
6654 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
6655 : "=w"(result) \
6656 : "w"(a_), "i"(b) \
6657 : /* No clobbers */); \
6658 result; \
6661 #define vcvtq_n_f64_u64(a, b) \
6662 __extension__ \
6663 ({ \
6664 uint64x2_t a_ = (a); \
6665 float64x2_t result; \
6666 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
6667 : "=w"(result) \
6668 : "w"(a_), "i"(b) \
6669 : /* No clobbers */); \
6670 result; \
6673 #define vcvtq_n_s32_f32(a, b) \
6674 __extension__ \
6675 ({ \
6676 float32x4_t a_ = (a); \
6677 int32x4_t result; \
6678 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
6679 : "=w"(result) \
6680 : "w"(a_), "i"(b) \
6681 : /* No clobbers */); \
6682 result; \
6685 #define vcvtq_n_s64_f64(a, b) \
6686 __extension__ \
6687 ({ \
6688 float64x2_t a_ = (a); \
6689 int64x2_t result; \
6690 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
6691 : "=w"(result) \
6692 : "w"(a_), "i"(b) \
6693 : /* No clobbers */); \
6694 result; \
6697 #define vcvtq_n_u32_f32(a, b) \
6698 __extension__ \
6699 ({ \
6700 float32x4_t a_ = (a); \
6701 uint32x4_t result; \
6702 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
6703 : "=w"(result) \
6704 : "w"(a_), "i"(b) \
6705 : /* No clobbers */); \
6706 result; \
6709 #define vcvtq_n_u64_f64(a, b) \
6710 __extension__ \
6711 ({ \
6712 float64x2_t a_ = (a); \
6713 uint64x2_t result; \
6714 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
6715 : "=w"(result) \
6716 : "w"(a_), "i"(b) \
6717 : /* No clobbers */); \
6718 result; \
6721 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6722 vcvtq_s32_f32 (float32x4_t a)
6724 int32x4_t result;
6725 __asm__ ("fcvtzs %0.4s, %1.4s"
6726 : "=w"(result)
6727 : "w"(a)
6728 : /* No clobbers */);
6729 return result;
6732 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6733 vcvtq_s64_f64 (float64x2_t a)
6735 int64x2_t result;
6736 __asm__ ("fcvtzs %0.2d, %1.2d"
6737 : "=w"(result)
6738 : "w"(a)
6739 : /* No clobbers */);
6740 return result;
6743 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6744 vcvtq_u32_f32 (float32x4_t a)
6746 uint32x4_t result;
6747 __asm__ ("fcvtzu %0.4s, %1.4s"
6748 : "=w"(result)
6749 : "w"(a)
6750 : /* No clobbers */);
6751 return result;
6754 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6755 vcvtq_u64_f64 (float64x2_t a)
6757 uint64x2_t result;
6758 __asm__ ("fcvtzu %0.2d, %1.2d"
6759 : "=w"(result)
6760 : "w"(a)
6761 : /* No clobbers */);
6762 return result;
6765 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
6766 vcvts_f64_s32 (int32_t a)
6768 int32_t result;
6769 __asm__ ("scvtf %s0,%s1"
6770 : "=w"(result)
6771 : "w"(a)
6772 : /* No clobbers */);
6773 return result;
6776 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
6777 vcvts_f64_u32 (uint32_t a)
6779 uint32_t result;
6780 __asm__ ("ucvtf %s0,%s1"
6781 : "=w"(result)
6782 : "w"(a)
6783 : /* No clobbers */);
6784 return result;
6787 #define vcvts_n_f32_s32(a, b) \
6788 __extension__ \
6789 ({ \
6790 int32_t a_ = (a); \
6791 int32_t result; \
6792 __asm__ ("scvtf %s0,%s1,%2" \
6793 : "=w"(result) \
6794 : "w"(a_), "i"(b) \
6795 : /* No clobbers */); \
6796 result; \
6799 #define vcvts_n_f32_u32(a, b) \
6800 __extension__ \
6801 ({ \
6802 uint32_t a_ = (a); \
6803 uint32_t result; \
6804 __asm__ ("ucvtf %s0,%s1,%2" \
6805 : "=w"(result) \
6806 : "w"(a_), "i"(b) \
6807 : /* No clobbers */); \
6808 result; \
6811 #define vcvts_n_s32_f32(a, b) \
6812 __extension__ \
6813 ({ \
6814 float32_t a_ = (a); \
6815 float32_t result; \
6816 __asm__ ("fcvtzs %s0,%s1,%2" \
6817 : "=w"(result) \
6818 : "w"(a_), "i"(b) \
6819 : /* No clobbers */); \
6820 result; \
6823 #define vcvts_n_u32_f32(a, b) \
6824 __extension__ \
6825 ({ \
6826 float32_t a_ = (a); \
6827 float32_t result; \
6828 __asm__ ("fcvtzu %s0,%s1,%2" \
6829 : "=w"(result) \
6830 : "w"(a_), "i"(b) \
6831 : /* No clobbers */); \
6832 result; \
6835 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6836 vcvts_s64_f64 (float32_t a)
6838 float32_t result;
6839 __asm__ ("fcvtzs %s0,%s1"
6840 : "=w"(result)
6841 : "w"(a)
6842 : /* No clobbers */);
6843 return result;
6846 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6847 vcvts_u64_f64 (float32_t a)
6849 float32_t result;
6850 __asm__ ("fcvtzu %s0,%s1"
6851 : "=w"(result)
6852 : "w"(a)
6853 : /* No clobbers */);
6854 return result;
6857 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6858 vcvtx_f32_f64 (float64x2_t a)
6860 float32x2_t result;
6861 __asm__ ("fcvtxn %0.2s,%1.2d"
6862 : "=w"(result)
6863 : "w"(a)
6864 : /* No clobbers */);
6865 return result;
6868 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6869 vcvtx_high_f32_f64 (float64x2_t a)
6871 float32x4_t result;
6872 __asm__ ("fcvtxn2 %0.4s,%1.2d"
6873 : "=w"(result)
6874 : "w"(a)
6875 : /* No clobbers */);
6876 return result;
6879 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
6880 vcvtxd_f32_f64 (float64_t a)
6882 float32_t result;
6883 __asm__ ("fcvtxn %s0,%d1"
6884 : "=w"(result)
6885 : "w"(a)
6886 : /* No clobbers */);
6887 return result;
6890 #define vdup_lane_f32(a, b) \
6891 __extension__ \
6892 ({ \
6893 float32x2_t a_ = (a); \
6894 float32x2_t result; \
6895 __asm__ ("dup %0.2s,%1.s[%2]" \
6896 : "=w"(result) \
6897 : "w"(a_), "i"(b) \
6898 : /* No clobbers */); \
6899 result; \
6902 #define vdup_lane_p8(a, b) \
6903 __extension__ \
6904 ({ \
6905 poly8x8_t a_ = (a); \
6906 poly8x8_t result; \
6907 __asm__ ("dup %0.8b,%1.b[%2]" \
6908 : "=w"(result) \
6909 : "w"(a_), "i"(b) \
6910 : /* No clobbers */); \
6911 result; \
6914 #define vdup_lane_p16(a, b) \
6915 __extension__ \
6916 ({ \
6917 poly16x4_t a_ = (a); \
6918 poly16x4_t result; \
6919 __asm__ ("dup %0.4h,%1.h[%2]" \
6920 : "=w"(result) \
6921 : "w"(a_), "i"(b) \
6922 : /* No clobbers */); \
6923 result; \
6926 #define vdup_lane_s8(a, b) \
6927 __extension__ \
6928 ({ \
6929 int8x8_t a_ = (a); \
6930 int8x8_t result; \
6931 __asm__ ("dup %0.8b,%1.b[%2]" \
6932 : "=w"(result) \
6933 : "w"(a_), "i"(b) \
6934 : /* No clobbers */); \
6935 result; \
6938 #define vdup_lane_s16(a, b) \
6939 __extension__ \
6940 ({ \
6941 int16x4_t a_ = (a); \
6942 int16x4_t result; \
6943 __asm__ ("dup %0.4h,%1.h[%2]" \
6944 : "=w"(result) \
6945 : "w"(a_), "i"(b) \
6946 : /* No clobbers */); \
6947 result; \
6950 #define vdup_lane_s32(a, b) \
6951 __extension__ \
6952 ({ \
6953 int32x2_t a_ = (a); \
6954 int32x2_t result; \
6955 __asm__ ("dup %0.2s,%1.s[%2]" \
6956 : "=w"(result) \
6957 : "w"(a_), "i"(b) \
6958 : /* No clobbers */); \
6959 result; \
6962 #define vdup_lane_s64(a, b) \
6963 __extension__ \
6964 ({ \
6965 int64x1_t a_ = (a); \
6966 int64x1_t result; \
6967 __asm__ ("ins %0.d[0],%1.d[%2]" \
6968 : "=w"(result) \
6969 : "w"(a_), "i"(b) \
6970 : /* No clobbers */); \
6971 result; \
6974 #define vdup_lane_u8(a, b) \
6975 __extension__ \
6976 ({ \
6977 uint8x8_t a_ = (a); \
6978 uint8x8_t result; \
6979 __asm__ ("dup %0.8b,%1.b[%2]" \
6980 : "=w"(result) \
6981 : "w"(a_), "i"(b) \
6982 : /* No clobbers */); \
6983 result; \
6986 #define vdup_lane_u16(a, b) \
6987 __extension__ \
6988 ({ \
6989 uint16x4_t a_ = (a); \
6990 uint16x4_t result; \
6991 __asm__ ("dup %0.4h,%1.h[%2]" \
6992 : "=w"(result) \
6993 : "w"(a_), "i"(b) \
6994 : /* No clobbers */); \
6995 result; \
6998 #define vdup_lane_u32(a, b) \
6999 __extension__ \
7000 ({ \
7001 uint32x2_t a_ = (a); \
7002 uint32x2_t result; \
7003 __asm__ ("dup %0.2s,%1.s[%2]" \
7004 : "=w"(result) \
7005 : "w"(a_), "i"(b) \
7006 : /* No clobbers */); \
7007 result; \
7010 #define vdup_lane_u64(a, b) \
7011 __extension__ \
7012 ({ \
7013 uint64x1_t a_ = (a); \
7014 uint64x1_t result; \
7015 __asm__ ("ins %0.d[0],%1.d[%2]" \
7016 : "=w"(result) \
7017 : "w"(a_), "i"(b) \
7018 : /* No clobbers */); \
7019 result; \
7022 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7023 vdup_n_f32 (float32_t a)
7025 float32x2_t result;
7026 __asm__ ("dup %0.2s, %w1"
7027 : "=w"(result)
7028 : "r"(a)
7029 : /* No clobbers */);
7030 return result;
7033 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7034 vdup_n_p8 (uint32_t a)
7036 poly8x8_t result;
7037 __asm__ ("dup %0.8b,%w1"
7038 : "=w"(result)
7039 : "r"(a)
7040 : /* No clobbers */);
7041 return result;
7044 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
7045 vdup_n_p16 (uint32_t a)
7047 poly16x4_t result;
7048 __asm__ ("dup %0.4h,%w1"
7049 : "=w"(result)
7050 : "r"(a)
7051 : /* No clobbers */);
7052 return result;
7055 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7056 vdup_n_s8 (int32_t a)
7058 int8x8_t result;
7059 __asm__ ("dup %0.8b,%w1"
7060 : "=w"(result)
7061 : "r"(a)
7062 : /* No clobbers */);
7063 return result;
7066 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7067 vdup_n_s16 (int32_t a)
7069 int16x4_t result;
7070 __asm__ ("dup %0.4h,%w1"
7071 : "=w"(result)
7072 : "r"(a)
7073 : /* No clobbers */);
7074 return result;
7077 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7078 vdup_n_s32 (int32_t a)
7080 int32x2_t result;
7081 __asm__ ("dup %0.2s,%w1"
7082 : "=w"(result)
7083 : "r"(a)
7084 : /* No clobbers */);
7085 return result;
7088 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7089 vdup_n_s64 (int64_t a)
7091 int64x1_t result;
7092 __asm__ ("ins %0.d[0],%x1"
7093 : "=w"(result)
7094 : "r"(a)
7095 : /* No clobbers */);
7096 return result;
7099 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7100 vdup_n_u8 (uint32_t a)
7102 uint8x8_t result;
7103 __asm__ ("dup %0.8b,%w1"
7104 : "=w"(result)
7105 : "r"(a)
7106 : /* No clobbers */);
7107 return result;
7110 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7111 vdup_n_u16 (uint32_t a)
7113 uint16x4_t result;
7114 __asm__ ("dup %0.4h,%w1"
7115 : "=w"(result)
7116 : "r"(a)
7117 : /* No clobbers */);
7118 return result;
7121 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7122 vdup_n_u32 (uint32_t a)
7124 uint32x2_t result;
7125 __asm__ ("dup %0.2s,%w1"
7126 : "=w"(result)
7127 : "r"(a)
7128 : /* No clobbers */);
7129 return result;
7132 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
7133 vdup_n_u64 (uint64_t a)
7135 uint64x1_t result;
7136 __asm__ ("ins %0.d[0],%x1"
7137 : "=w"(result)
7138 : "r"(a)
7139 : /* No clobbers */);
7140 return result;
7143 #define vdupd_lane_f64(a, b) \
7144 __extension__ \
7145 ({ \
7146 float64x2_t a_ = (a); \
7147 float64_t result; \
7148 __asm__ ("dup %d0, %1.d[%2]" \
7149 : "=w"(result) \
7150 : "w"(a_), "i"(b) \
7151 : /* No clobbers */); \
7152 result; \
7155 #define vdupq_lane_f32(a, b) \
7156 __extension__ \
7157 ({ \
7158 float32x2_t a_ = (a); \
7159 float32x4_t result; \
7160 __asm__ ("dup %0.4s,%1.s[%2]" \
7161 : "=w"(result) \
7162 : "w"(a_), "i"(b) \
7163 : /* No clobbers */); \
7164 result; \
7167 #define vdupq_lane_f64(a, b) \
7168 __extension__ \
7169 ({ \
7170 float64x1_t a_ = (a); \
7171 float64x2_t result; \
7172 __asm__ ("dup %0.2d,%1.d[%2]" \
7173 : "=w"(result) \
7174 : "w"(a_), "i"(b) \
7175 : /* No clobbers */); \
7176 result; \
7179 #define vdupq_lane_p8(a, b) \
7180 __extension__ \
7181 ({ \
7182 poly8x8_t a_ = (a); \
7183 poly8x16_t result; \
7184 __asm__ ("dup %0.16b,%1.b[%2]" \
7185 : "=w"(result) \
7186 : "w"(a_), "i"(b) \
7187 : /* No clobbers */); \
7188 result; \
7191 #define vdupq_lane_p16(a, b) \
7192 __extension__ \
7193 ({ \
7194 poly16x4_t a_ = (a); \
7195 poly16x8_t result; \
7196 __asm__ ("dup %0.8h,%1.h[%2]" \
7197 : "=w"(result) \
7198 : "w"(a_), "i"(b) \
7199 : /* No clobbers */); \
7200 result; \
7203 #define vdupq_lane_s8(a, b) \
7204 __extension__ \
7205 ({ \
7206 int8x8_t a_ = (a); \
7207 int8x16_t result; \
7208 __asm__ ("dup %0.16b,%1.b[%2]" \
7209 : "=w"(result) \
7210 : "w"(a_), "i"(b) \
7211 : /* No clobbers */); \
7212 result; \
7215 #define vdupq_lane_s16(a, b) \
7216 __extension__ \
7217 ({ \
7218 int16x4_t a_ = (a); \
7219 int16x8_t result; \
7220 __asm__ ("dup %0.8h,%1.h[%2]" \
7221 : "=w"(result) \
7222 : "w"(a_), "i"(b) \
7223 : /* No clobbers */); \
7224 result; \
7227 #define vdupq_lane_s32(a, b) \
7228 __extension__ \
7229 ({ \
7230 int32x2_t a_ = (a); \
7231 int32x4_t result; \
7232 __asm__ ("dup %0.4s,%1.s[%2]" \
7233 : "=w"(result) \
7234 : "w"(a_), "i"(b) \
7235 : /* No clobbers */); \
7236 result; \
7239 #define vdupq_lane_s64(a, b) \
7240 __extension__ \
7241 ({ \
7242 int64x1_t a_ = (a); \
7243 int64x2_t result; \
7244 __asm__ ("dup %0.2d,%1.d[%2]" \
7245 : "=w"(result) \
7246 : "w"(a_), "i"(b) \
7247 : /* No clobbers */); \
7248 result; \
7251 #define vdupq_lane_u8(a, b) \
7252 __extension__ \
7253 ({ \
7254 uint8x8_t a_ = (a); \
7255 uint8x16_t result; \
7256 __asm__ ("dup %0.16b,%1.b[%2]" \
7257 : "=w"(result) \
7258 : "w"(a_), "i"(b) \
7259 : /* No clobbers */); \
7260 result; \
7263 #define vdupq_lane_u16(a, b) \
7264 __extension__ \
7265 ({ \
7266 uint16x4_t a_ = (a); \
7267 uint16x8_t result; \
7268 __asm__ ("dup %0.8h,%1.h[%2]" \
7269 : "=w"(result) \
7270 : "w"(a_), "i"(b) \
7271 : /* No clobbers */); \
7272 result; \
7275 #define vdupq_lane_u32(a, b) \
7276 __extension__ \
7277 ({ \
7278 uint32x2_t a_ = (a); \
7279 uint32x4_t result; \
7280 __asm__ ("dup %0.4s,%1.s[%2]" \
7281 : "=w"(result) \
7282 : "w"(a_), "i"(b) \
7283 : /* No clobbers */); \
7284 result; \
7287 #define vdupq_lane_u64(a, b) \
7288 __extension__ \
7289 ({ \
7290 uint64x1_t a_ = (a); \
7291 uint64x2_t result; \
7292 __asm__ ("dup %0.2d,%1.d[%2]" \
7293 : "=w"(result) \
7294 : "w"(a_), "i"(b) \
7295 : /* No clobbers */); \
7296 result; \
7299 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7300 vdupq_n_f32 (float32_t a)
7302 float32x4_t result;
7303 __asm__ ("dup %0.4s, %w1"
7304 : "=w"(result)
7305 : "r"(a)
7306 : /* No clobbers */);
7307 return result;
7310 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7311 vdupq_n_f64 (float64_t a)
7313 float64x2_t result;
7314 __asm__ ("dup %0.2d, %x1"
7315 : "=w"(result)
7316 : "r"(a)
7317 : /* No clobbers */);
7318 return result;
7321 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
7322 vdupq_n_p8 (uint32_t a)
7324 poly8x16_t result;
7325 __asm__ ("dup %0.16b,%w1"
7326 : "=w"(result)
7327 : "r"(a)
7328 : /* No clobbers */);
7329 return result;
7332 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7333 vdupq_n_p16 (uint32_t a)
7335 poly16x8_t result;
7336 __asm__ ("dup %0.8h,%w1"
7337 : "=w"(result)
7338 : "r"(a)
7339 : /* No clobbers */);
7340 return result;
7343 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7344 vdupq_n_s8 (int32_t a)
7346 int8x16_t result;
7347 __asm__ ("dup %0.16b,%w1"
7348 : "=w"(result)
7349 : "r"(a)
7350 : /* No clobbers */);
7351 return result;
7354 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7355 vdupq_n_s16 (int32_t a)
7357 int16x8_t result;
7358 __asm__ ("dup %0.8h,%w1"
7359 : "=w"(result)
7360 : "r"(a)
7361 : /* No clobbers */);
7362 return result;
7365 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7366 vdupq_n_s32 (int32_t a)
7368 int32x4_t result;
7369 __asm__ ("dup %0.4s,%w1"
7370 : "=w"(result)
7371 : "r"(a)
7372 : /* No clobbers */);
7373 return result;
7376 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7377 vdupq_n_s64 (int64_t a)
7379 int64x2_t result;
7380 __asm__ ("dup %0.2d,%x1"
7381 : "=w"(result)
7382 : "r"(a)
7383 : /* No clobbers */);
7384 return result;
7387 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7388 vdupq_n_u8 (uint32_t a)
7390 uint8x16_t result;
7391 __asm__ ("dup %0.16b,%w1"
7392 : "=w"(result)
7393 : "r"(a)
7394 : /* No clobbers */);
7395 return result;
7398 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7399 vdupq_n_u16 (uint32_t a)
7401 uint16x8_t result;
7402 __asm__ ("dup %0.8h,%w1"
7403 : "=w"(result)
7404 : "r"(a)
7405 : /* No clobbers */);
7406 return result;
7409 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7410 vdupq_n_u32 (uint32_t a)
7412 uint32x4_t result;
7413 __asm__ ("dup %0.4s,%w1"
7414 : "=w"(result)
7415 : "r"(a)
7416 : /* No clobbers */);
7417 return result;
7420 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7421 vdupq_n_u64 (uint64_t a)
7423 uint64x2_t result;
7424 __asm__ ("dup %0.2d,%x1"
7425 : "=w"(result)
7426 : "r"(a)
7427 : /* No clobbers */);
7428 return result;
7431 #define vdups_lane_f32(a, b) \
7432 __extension__ \
7433 ({ \
7434 float32x4_t a_ = (a); \
7435 float32_t result; \
7436 __asm__ ("dup %s0, %1.s[%2]" \
7437 : "=w"(result) \
7438 : "w"(a_), "i"(b) \
7439 : /* No clobbers */); \
7440 result; \
7443 #define vext_f32(a, b, c) \
7444 __extension__ \
7445 ({ \
7446 float32x2_t b_ = (b); \
7447 float32x2_t a_ = (a); \
7448 float32x2_t result; \
7449 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
7450 : "=w"(result) \
7451 : "w"(a_), "w"(b_), "i"(c) \
7452 : /* No clobbers */); \
7453 result; \
7456 #define vext_f64(a, b, c) \
7457 __extension__ \
7458 ({ \
7459 float64x1_t b_ = (b); \
7460 float64x1_t a_ = (a); \
7461 float64x1_t result; \
7462 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
7463 : "=w"(result) \
7464 : "w"(a_), "w"(b_), "i"(c) \
7465 : /* No clobbers */); \
7466 result; \
7469 #define vext_p8(a, b, c) \
7470 __extension__ \
7471 ({ \
7472 poly8x8_t b_ = (b); \
7473 poly8x8_t a_ = (a); \
7474 poly8x8_t result; \
7475 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
7476 : "=w"(result) \
7477 : "w"(a_), "w"(b_), "i"(c) \
7478 : /* No clobbers */); \
7479 result; \
7482 #define vext_p16(a, b, c) \
7483 __extension__ \
7484 ({ \
7485 poly16x4_t b_ = (b); \
7486 poly16x4_t a_ = (a); \
7487 poly16x4_t result; \
7488 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
7489 : "=w"(result) \
7490 : "w"(a_), "w"(b_), "i"(c) \
7491 : /* No clobbers */); \
7492 result; \
7495 #define vext_s8(a, b, c) \
7496 __extension__ \
7497 ({ \
7498 int8x8_t b_ = (b); \
7499 int8x8_t a_ = (a); \
7500 int8x8_t result; \
7501 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
7502 : "=w"(result) \
7503 : "w"(a_), "w"(b_), "i"(c) \
7504 : /* No clobbers */); \
7505 result; \
7508 #define vext_s16(a, b, c) \
7509 __extension__ \
7510 ({ \
7511 int16x4_t b_ = (b); \
7512 int16x4_t a_ = (a); \
7513 int16x4_t result; \
7514 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
7515 : "=w"(result) \
7516 : "w"(a_), "w"(b_), "i"(c) \
7517 : /* No clobbers */); \
7518 result; \
7521 #define vext_s32(a, b, c) \
7522 __extension__ \
7523 ({ \
7524 int32x2_t b_ = (b); \
7525 int32x2_t a_ = (a); \
7526 int32x2_t result; \
7527 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
7528 : "=w"(result) \
7529 : "w"(a_), "w"(b_), "i"(c) \
7530 : /* No clobbers */); \
7531 result; \
7534 #define vext_s64(a, b, c) \
7535 __extension__ \
7536 ({ \
7537 int64x1_t b_ = (b); \
7538 int64x1_t a_ = (a); \
7539 int64x1_t result; \
7540 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
7541 : "=w"(result) \
7542 : "w"(a_), "w"(b_), "i"(c) \
7543 : /* No clobbers */); \
7544 result; \
7547 #define vext_u8(a, b, c) \
7548 __extension__ \
7549 ({ \
7550 uint8x8_t b_ = (b); \
7551 uint8x8_t a_ = (a); \
7552 uint8x8_t result; \
7553 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
7554 : "=w"(result) \
7555 : "w"(a_), "w"(b_), "i"(c) \
7556 : /* No clobbers */); \
7557 result; \
7560 #define vext_u16(a, b, c) \
7561 __extension__ \
7562 ({ \
7563 uint16x4_t b_ = (b); \
7564 uint16x4_t a_ = (a); \
7565 uint16x4_t result; \
7566 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
7567 : "=w"(result) \
7568 : "w"(a_), "w"(b_), "i"(c) \
7569 : /* No clobbers */); \
7570 result; \
7573 #define vext_u32(a, b, c) \
7574 __extension__ \
7575 ({ \
7576 uint32x2_t b_ = (b); \
7577 uint32x2_t a_ = (a); \
7578 uint32x2_t result; \
7579 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
7580 : "=w"(result) \
7581 : "w"(a_), "w"(b_), "i"(c) \
7582 : /* No clobbers */); \
7583 result; \
7586 #define vext_u64(a, b, c) \
7587 __extension__ \
7588 ({ \
7589 uint64x1_t b_ = (b); \
7590 uint64x1_t a_ = (a); \
7591 uint64x1_t result; \
7592 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
7593 : "=w"(result) \
7594 : "w"(a_), "w"(b_), "i"(c) \
7595 : /* No clobbers */); \
7596 result; \
7599 #define vextq_f32(a, b, c) \
7600 __extension__ \
7601 ({ \
7602 float32x4_t b_ = (b); \
7603 float32x4_t a_ = (a); \
7604 float32x4_t result; \
7605 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
7606 : "=w"(result) \
7607 : "w"(a_), "w"(b_), "i"(c) \
7608 : /* No clobbers */); \
7609 result; \
7612 #define vextq_f64(a, b, c) \
7613 __extension__ \
7614 ({ \
7615 float64x2_t b_ = (b); \
7616 float64x2_t a_ = (a); \
7617 float64x2_t result; \
7618 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
7619 : "=w"(result) \
7620 : "w"(a_), "w"(b_), "i"(c) \
7621 : /* No clobbers */); \
7622 result; \
7625 #define vextq_p8(a, b, c) \
7626 __extension__ \
7627 ({ \
7628 poly8x16_t b_ = (b); \
7629 poly8x16_t a_ = (a); \
7630 poly8x16_t result; \
7631 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
7632 : "=w"(result) \
7633 : "w"(a_), "w"(b_), "i"(c) \
7634 : /* No clobbers */); \
7635 result; \
7638 #define vextq_p16(a, b, c) \
7639 __extension__ \
7640 ({ \
7641 poly16x8_t b_ = (b); \
7642 poly16x8_t a_ = (a); \
7643 poly16x8_t result; \
7644 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
7645 : "=w"(result) \
7646 : "w"(a_), "w"(b_), "i"(c) \
7647 : /* No clobbers */); \
7648 result; \
7651 #define vextq_s8(a, b, c) \
7652 __extension__ \
7653 ({ \
7654 int8x16_t b_ = (b); \
7655 int8x16_t a_ = (a); \
7656 int8x16_t result; \
7657 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
7658 : "=w"(result) \
7659 : "w"(a_), "w"(b_), "i"(c) \
7660 : /* No clobbers */); \
7661 result; \
7664 #define vextq_s16(a, b, c) \
7665 __extension__ \
7666 ({ \
7667 int16x8_t b_ = (b); \
7668 int16x8_t a_ = (a); \
7669 int16x8_t result; \
7670 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
7671 : "=w"(result) \
7672 : "w"(a_), "w"(b_), "i"(c) \
7673 : /* No clobbers */); \
7674 result; \
7677 #define vextq_s32(a, b, c) \
7678 __extension__ \
7679 ({ \
7680 int32x4_t b_ = (b); \
7681 int32x4_t a_ = (a); \
7682 int32x4_t result; \
7683 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
7684 : "=w"(result) \
7685 : "w"(a_), "w"(b_), "i"(c) \
7686 : /* No clobbers */); \
7687 result; \
7690 #define vextq_s64(a, b, c) \
7691 __extension__ \
7692 ({ \
7693 int64x2_t b_ = (b); \
7694 int64x2_t a_ = (a); \
7695 int64x2_t result; \
7696 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
7697 : "=w"(result) \
7698 : "w"(a_), "w"(b_), "i"(c) \
7699 : /* No clobbers */); \
7700 result; \
7703 #define vextq_u8(a, b, c) \
7704 __extension__ \
7705 ({ \
7706 uint8x16_t b_ = (b); \
7707 uint8x16_t a_ = (a); \
7708 uint8x16_t result; \
7709 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
7710 : "=w"(result) \
7711 : "w"(a_), "w"(b_), "i"(c) \
7712 : /* No clobbers */); \
7713 result; \
7716 #define vextq_u16(a, b, c) \
7717 __extension__ \
7718 ({ \
7719 uint16x8_t b_ = (b); \
7720 uint16x8_t a_ = (a); \
7721 uint16x8_t result; \
7722 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
7723 : "=w"(result) \
7724 : "w"(a_), "w"(b_), "i"(c) \
7725 : /* No clobbers */); \
7726 result; \
7729 #define vextq_u32(a, b, c) \
7730 __extension__ \
7731 ({ \
7732 uint32x4_t b_ = (b); \
7733 uint32x4_t a_ = (a); \
7734 uint32x4_t result; \
7735 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
7736 : "=w"(result) \
7737 : "w"(a_), "w"(b_), "i"(c) \
7738 : /* No clobbers */); \
7739 result; \
7742 #define vextq_u64(a, b, c) \
7743 __extension__ \
7744 ({ \
7745 uint64x2_t b_ = (b); \
7746 uint64x2_t a_ = (a); \
7747 uint64x2_t result; \
7748 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
7749 : "=w"(result) \
7750 : "w"(a_), "w"(b_), "i"(c) \
7751 : /* No clobbers */); \
7752 result; \
7755 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7756 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
7758 float32x2_t result;
7759 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
7760 : "=w"(result)
7761 : "0"(a), "w"(b), "w"(c)
7762 : /* No clobbers */);
7763 return result;
7766 #define vfma_lane_f32(a, b, c, d) \
7767 __extension__ \
7768 ({ \
7769 float32x2_t c_ = (c); \
7770 float32x2_t b_ = (b); \
7771 float32x2_t a_ = (a); \
7772 float32x2_t result; \
7773 __asm__ ("fmla %0.2s,%2.2s,%3.s[%4]" \
7774 : "=w"(result) \
7775 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7776 : /* No clobbers */); \
7777 result; \
7780 #define vfmad_lane_f64(a, b, c) \
7781 __extension__ \
7782 ({ \
7783 float64x2_t b_ = (b); \
7784 float64_t a_ = (a); \
7785 float64_t result; \
7786 __asm__ ("fmla %d0,%d1,%2.d[%3]" \
7787 : "=w"(result) \
7788 : "w"(a_), "w"(b_), "i"(c) \
7789 : /* No clobbers */); \
7790 result; \
7793 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7794 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
7796 float32x4_t result;
7797 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
7798 : "=w"(result)
7799 : "0"(a), "w"(b), "w"(c)
7800 : /* No clobbers */);
7801 return result;
7804 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7805 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
7807 float64x2_t result;
7808 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
7809 : "=w"(result)
7810 : "0"(a), "w"(b), "w"(c)
7811 : /* No clobbers */);
7812 return result;
7815 #define vfmaq_lane_f32(a, b, c, d) \
7816 __extension__ \
7817 ({ \
7818 float32x4_t c_ = (c); \
7819 float32x4_t b_ = (b); \
7820 float32x4_t a_ = (a); \
7821 float32x4_t result; \
7822 __asm__ ("fmla %0.4s,%2.4s,%3.s[%4]" \
7823 : "=w"(result) \
7824 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7825 : /* No clobbers */); \
7826 result; \
7829 #define vfmaq_lane_f64(a, b, c, d) \
7830 __extension__ \
7831 ({ \
7832 float64x2_t c_ = (c); \
7833 float64x2_t b_ = (b); \
7834 float64x2_t a_ = (a); \
7835 float64x2_t result; \
7836 __asm__ ("fmla %0.2d,%2.2d,%3.d[%4]" \
7837 : "=w"(result) \
7838 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7839 : /* No clobbers */); \
7840 result; \
7843 #define vfmas_lane_f32(a, b, c) \
7844 __extension__ \
7845 ({ \
7846 float32x4_t b_ = (b); \
7847 float32_t a_ = (a); \
7848 float32_t result; \
7849 __asm__ ("fmla %s0,%s1,%2.s[%3]" \
7850 : "=w"(result) \
7851 : "w"(a_), "w"(b_), "i"(c) \
7852 : /* No clobbers */); \
7853 result; \
7856 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7857 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7859 float32x2_t result;
7860 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
7861 : "=w"(result)
7862 : "0"(a), "w"(b), "w"(c)
7863 : /* No clobbers */);
7864 return result;
7867 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7868 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7870 float32x4_t result;
7871 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
7872 : "=w"(result)
7873 : "0"(a), "w"(b), "w"(c)
7874 : /* No clobbers */);
7875 return result;
7878 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7879 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
7881 float64x2_t result;
7882 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
7883 : "=w"(result)
7884 : "0"(a), "w"(b), "w"(c)
7885 : /* No clobbers */);
7886 return result;
7889 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7890 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
7892 float32x2_t result;
7893 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
7894 : "=w"(result)
7895 : "0"(a), "w"(b), "w"(c)
7896 : /* No clobbers */);
7897 return result;
7900 #define vfmsd_lane_f64(a, b, c) \
7901 __extension__ \
7902 ({ \
7903 float64x2_t b_ = (b); \
7904 float64_t a_ = (a); \
7905 float64_t result; \
7906 __asm__ ("fmls %d0,%d1,%2.d[%3]" \
7907 : "=w"(result) \
7908 : "w"(a_), "w"(b_), "i"(c) \
7909 : /* No clobbers */); \
7910 result; \
7913 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7914 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
7916 float32x4_t result;
7917 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
7918 : "=w"(result)
7919 : "0"(a), "w"(b), "w"(c)
7920 : /* No clobbers */);
7921 return result;
7924 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7925 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
7927 float64x2_t result;
7928 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
7929 : "=w"(result)
7930 : "0"(a), "w"(b), "w"(c)
7931 : /* No clobbers */);
7932 return result;
7935 #define vfmss_lane_f32(a, b, c) \
7936 __extension__ \
7937 ({ \
7938 float32x4_t b_ = (b); \
7939 float32_t a_ = (a); \
7940 float32_t result; \
7941 __asm__ ("fmls %s0,%s1,%2.s[%3]" \
7942 : "=w"(result) \
7943 : "w"(a_), "w"(b_), "i"(c) \
7944 : /* No clobbers */); \
7945 result; \
7948 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7949 vget_high_f32 (float32x4_t a)
7951 float32x2_t result;
7952 __asm__ ("ins %0.d[0], %1.d[1]"
7953 : "=w"(result)
7954 : "w"(a)
7955 : /* No clobbers */);
7956 return result;
7959 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
7960 vget_high_f64 (float64x2_t a)
7962 float64x1_t result;
7963 __asm__ ("ins %0.d[0], %1.d[1]"
7964 : "=w"(result)
7965 : "w"(a)
7966 : /* No clobbers */);
7967 return result;
7970 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7971 vget_high_p8 (poly8x16_t a)
7973 poly8x8_t result;
7974 __asm__ ("ins %0.d[0], %1.d[1]"
7975 : "=w"(result)
7976 : "w"(a)
7977 : /* No clobbers */);
7978 return result;
7981 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
7982 vget_high_p16 (poly16x8_t a)
7984 poly16x4_t result;
7985 __asm__ ("ins %0.d[0], %1.d[1]"
7986 : "=w"(result)
7987 : "w"(a)
7988 : /* No clobbers */);
7989 return result;
7992 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7993 vget_high_s8 (int8x16_t a)
7995 int8x8_t result;
7996 __asm__ ("ins %0.d[0], %1.d[1]"
7997 : "=w"(result)
7998 : "w"(a)
7999 : /* No clobbers */);
8000 return result;
8003 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8004 vget_high_s16 (int16x8_t a)
8006 int16x4_t result;
8007 __asm__ ("ins %0.d[0], %1.d[1]"
8008 : "=w"(result)
8009 : "w"(a)
8010 : /* No clobbers */);
8011 return result;
8014 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8015 vget_high_s32 (int32x4_t a)
8017 int32x2_t result;
8018 __asm__ ("ins %0.d[0], %1.d[1]"
8019 : "=w"(result)
8020 : "w"(a)
8021 : /* No clobbers */);
8022 return result;
8025 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8026 vget_high_s64 (int64x2_t a)
8028 int64x1_t result;
8029 __asm__ ("ins %0.d[0], %1.d[1]"
8030 : "=w"(result)
8031 : "w"(a)
8032 : /* No clobbers */);
8033 return result;
8036 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8037 vget_high_u8 (uint8x16_t a)
8039 uint8x8_t result;
8040 __asm__ ("ins %0.d[0], %1.d[1]"
8041 : "=w"(result)
8042 : "w"(a)
8043 : /* No clobbers */);
8044 return result;
8047 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8048 vget_high_u16 (uint16x8_t a)
8050 uint16x4_t result;
8051 __asm__ ("ins %0.d[0], %1.d[1]"
8052 : "=w"(result)
8053 : "w"(a)
8054 : /* No clobbers */);
8055 return result;
8058 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8059 vget_high_u32 (uint32x4_t a)
8061 uint32x2_t result;
8062 __asm__ ("ins %0.d[0], %1.d[1]"
8063 : "=w"(result)
8064 : "w"(a)
8065 : /* No clobbers */);
8066 return result;
8069 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8070 vget_high_u64 (uint64x2_t a)
8072 uint64x1_t result;
8073 __asm__ ("ins %0.d[0], %1.d[1]"
8074 : "=w"(result)
8075 : "w"(a)
8076 : /* No clobbers */);
8077 return result;
8080 #define vget_lane_f64(a, b) \
8081 __extension__ \
8082 ({ \
8083 float64x1_t a_ = (a); \
8084 float64_t result; \
8085 __asm__ ("umov %x0, %1.d[%2]" \
8086 : "=r"(result) \
8087 : "w"(a_), "i"(b) \
8088 : /* No clobbers */); \
8089 result; \
8092 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8093 vget_low_f32 (float32x4_t a)
8095 float32x2_t result;
8096 __asm__ ("ins %0.d[0], %1.d[0]"
8097 : "=w"(result)
8098 : "w"(a)
8099 : /* No clobbers */);
8100 return result;
8103 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
8104 vget_low_f64 (float64x2_t a)
8106 float64x1_t result;
8107 __asm__ ("ins %0.d[0], %1.d[0]"
8108 : "=w"(result)
8109 : "w"(a)
8110 : /* No clobbers */);
8111 return result;
8114 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8115 vget_low_p8 (poly8x16_t a)
8117 poly8x8_t result;
8118 __asm__ ("ins %0.d[0], %1.d[0]"
8119 : "=w"(result)
8120 : "w"(a)
8121 : /* No clobbers */);
8122 return result;
8125 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
8126 vget_low_p16 (poly16x8_t a)
8128 poly16x4_t result;
8129 __asm__ ("ins %0.d[0], %1.d[0]"
8130 : "=w"(result)
8131 : "w"(a)
8132 : /* No clobbers */);
8133 return result;
8136 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8137 vget_low_s8 (int8x16_t a)
8139 int8x8_t result;
8140 __asm__ ("ins %0.d[0], %1.d[0]"
8141 : "=w"(result)
8142 : "w"(a)
8143 : /* No clobbers */);
8144 return result;
8147 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8148 vget_low_s16 (int16x8_t a)
8150 int16x4_t result;
8151 __asm__ ("ins %0.d[0], %1.d[0]"
8152 : "=w"(result)
8153 : "w"(a)
8154 : /* No clobbers */);
8155 return result;
8158 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8159 vget_low_s32 (int32x4_t a)
8161 int32x2_t result;
8162 __asm__ ("ins %0.d[0], %1.d[0]"
8163 : "=w"(result)
8164 : "w"(a)
8165 : /* No clobbers */);
8166 return result;
8169 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8170 vget_low_s64 (int64x2_t a)
8172 int64x1_t result;
8173 __asm__ ("ins %0.d[0], %1.d[0]"
8174 : "=w"(result)
8175 : "w"(a)
8176 : /* No clobbers */);
8177 return result;
8180 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8181 vget_low_u8 (uint8x16_t a)
8183 uint8x8_t result;
8184 __asm__ ("ins %0.d[0], %1.d[0]"
8185 : "=w"(result)
8186 : "w"(a)
8187 : /* No clobbers */);
8188 return result;
8191 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8192 vget_low_u16 (uint16x8_t a)
8194 uint16x4_t result;
8195 __asm__ ("ins %0.d[0], %1.d[0]"
8196 : "=w"(result)
8197 : "w"(a)
8198 : /* No clobbers */);
8199 return result;
8202 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8203 vget_low_u32 (uint32x4_t a)
8205 uint32x2_t result;
8206 __asm__ ("ins %0.d[0], %1.d[0]"
8207 : "=w"(result)
8208 : "w"(a)
8209 : /* No clobbers */);
8210 return result;
8213 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8214 vget_low_u64 (uint64x2_t a)
8216 uint64x1_t result;
8217 __asm__ ("ins %0.d[0], %1.d[0]"
8218 : "=w"(result)
8219 : "w"(a)
8220 : /* No clobbers */);
8221 return result;
8224 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8225 vhsub_s8 (int8x8_t a, int8x8_t b)
8227 int8x8_t result;
8228 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
8229 : "=w"(result)
8230 : "w"(a), "w"(b)
8231 : /* No clobbers */);
8232 return result;
8235 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8236 vhsub_s16 (int16x4_t a, int16x4_t b)
8238 int16x4_t result;
8239 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
8240 : "=w"(result)
8241 : "w"(a), "w"(b)
8242 : /* No clobbers */);
8243 return result;
8246 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8247 vhsub_s32 (int32x2_t a, int32x2_t b)
8249 int32x2_t result;
8250 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
8251 : "=w"(result)
8252 : "w"(a), "w"(b)
8253 : /* No clobbers */);
8254 return result;
8257 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8258 vhsub_u8 (uint8x8_t a, uint8x8_t b)
8260 uint8x8_t result;
8261 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
8262 : "=w"(result)
8263 : "w"(a), "w"(b)
8264 : /* No clobbers */);
8265 return result;
8268 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8269 vhsub_u16 (uint16x4_t a, uint16x4_t b)
8271 uint16x4_t result;
8272 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
8273 : "=w"(result)
8274 : "w"(a), "w"(b)
8275 : /* No clobbers */);
8276 return result;
8279 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8280 vhsub_u32 (uint32x2_t a, uint32x2_t b)
8282 uint32x2_t result;
8283 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
8284 : "=w"(result)
8285 : "w"(a), "w"(b)
8286 : /* No clobbers */);
8287 return result;
8290 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8291 vhsubq_s8 (int8x16_t a, int8x16_t b)
8293 int8x16_t result;
8294 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
8295 : "=w"(result)
8296 : "w"(a), "w"(b)
8297 : /* No clobbers */);
8298 return result;
8301 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8302 vhsubq_s16 (int16x8_t a, int16x8_t b)
8304 int16x8_t result;
8305 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
8306 : "=w"(result)
8307 : "w"(a), "w"(b)
8308 : /* No clobbers */);
8309 return result;
8312 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8313 vhsubq_s32 (int32x4_t a, int32x4_t b)
8315 int32x4_t result;
8316 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
8317 : "=w"(result)
8318 : "w"(a), "w"(b)
8319 : /* No clobbers */);
8320 return result;
8323 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8324 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
8326 uint8x16_t result;
8327 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
8328 : "=w"(result)
8329 : "w"(a), "w"(b)
8330 : /* No clobbers */);
8331 return result;
8334 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8335 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
8337 uint16x8_t result;
8338 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
8339 : "=w"(result)
8340 : "w"(a), "w"(b)
8341 : /* No clobbers */);
8342 return result;
8345 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8346 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
8348 uint32x4_t result;
8349 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
8350 : "=w"(result)
8351 : "w"(a), "w"(b)
8352 : /* No clobbers */);
8353 return result;
8356 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8357 vld1_dup_f32 (const float32_t * a)
8359 float32x2_t result;
8360 __asm__ ("ld1r {%0.2s}, %1"
8361 : "=w"(result)
8362 : "Utv"(*a)
8363 : /* No clobbers */);
8364 return result;
8367 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
8368 vld1_dup_f64 (const float64_t * a)
8370 float64x1_t result;
8371 __asm__ ("ld1r {%0.1d}, %1"
8372 : "=w"(result)
8373 : "Utv"(*a)
8374 : /* No clobbers */);
8375 return result;
8378 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8379 vld1_dup_p8 (const poly8_t * a)
8381 poly8x8_t result;
8382 __asm__ ("ld1r {%0.8b}, %1"
8383 : "=w"(result)
8384 : "Utv"(*a)
8385 : /* No clobbers */);
8386 return result;
8389 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
8390 vld1_dup_p16 (const poly16_t * a)
8392 poly16x4_t result;
8393 __asm__ ("ld1r {%0.4h}, %1"
8394 : "=w"(result)
8395 : "Utv"(*a)
8396 : /* No clobbers */);
8397 return result;
8400 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8401 vld1_dup_s8 (const int8_t * a)
8403 int8x8_t result;
8404 __asm__ ("ld1r {%0.8b}, %1"
8405 : "=w"(result)
8406 : "Utv"(*a)
8407 : /* No clobbers */);
8408 return result;
8411 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8412 vld1_dup_s16 (const int16_t * a)
8414 int16x4_t result;
8415 __asm__ ("ld1r {%0.4h}, %1"
8416 : "=w"(result)
8417 : "Utv"(*a)
8418 : /* No clobbers */);
8419 return result;
8422 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8423 vld1_dup_s32 (const int32_t * a)
8425 int32x2_t result;
8426 __asm__ ("ld1r {%0.2s}, %1"
8427 : "=w"(result)
8428 : "Utv"(*a)
8429 : /* No clobbers */);
8430 return result;
8433 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8434 vld1_dup_s64 (const int64_t * a)
8436 int64x1_t result;
8437 __asm__ ("ld1r {%0.1d}, %1"
8438 : "=w"(result)
8439 : "Utv"(*a)
8440 : /* No clobbers */);
8441 return result;
8444 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8445 vld1_dup_u8 (const uint8_t * a)
8447 uint8x8_t result;
8448 __asm__ ("ld1r {%0.8b}, %1"
8449 : "=w"(result)
8450 : "Utv"(*a)
8451 : /* No clobbers */);
8452 return result;
8455 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8456 vld1_dup_u16 (const uint16_t * a)
8458 uint16x4_t result;
8459 __asm__ ("ld1r {%0.4h}, %1"
8460 : "=w"(result)
8461 : "Utv"(*a)
8462 : /* No clobbers */);
8463 return result;
8466 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8467 vld1_dup_u32 (const uint32_t * a)
8469 uint32x2_t result;
8470 __asm__ ("ld1r {%0.2s}, %1"
8471 : "=w"(result)
8472 : "Utv"(*a)
8473 : /* No clobbers */);
8474 return result;
8477 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8478 vld1_dup_u64 (const uint64_t * a)
8480 uint64x1_t result;
8481 __asm__ ("ld1r {%0.1d}, %1"
8482 : "=w"(result)
8483 : "Utv"(*a)
8484 : /* No clobbers */);
8485 return result;
8488 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8489 vld1_f32 (const float32_t * a)
8491 float32x2_t result;
8492 __asm__ ("ld1 {%0.2s}, %1"
8493 : "=w"(result)
8494 : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;}))
8495 : /* No clobbers */);
8496 return result;
8499 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
8500 vld1_f64 (const float64_t * a)
8502 float64x1_t result;
8503 __asm__ ("ld1 {%0.1d}, %1"
8504 : "=w"(result)
8505 : "Utv"(*a)
8506 : /* No clobbers */);
8507 return result;
8510 #define vld1_lane_f32(a, b, c) \
8511 __extension__ \
8512 ({ \
8513 float32x2_t b_ = (b); \
8514 const float32_t * a_ = (a); \
8515 float32x2_t result; \
8516 __asm__ ("ld1 {%0.s}[%1], %2" \
8517 : "=w"(result) \
8518 : "i" (c), "Utv"(*a_), "0"(b_) \
8519 : /* No clobbers */); \
8520 result; \
8523 #define vld1_lane_f64(a, b, c) \
8524 __extension__ \
8525 ({ \
8526 float64x1_t b_ = (b); \
8527 const float64_t * a_ = (a); \
8528 float64x1_t result; \
8529 __asm__ ("ld1 {%0.d}[%1], %2" \
8530 : "=w"(result) \
8531 : "i" (c), "Utv"(*a_), "0"(b_) \
8532 : /* No clobbers */); \
8533 result; \
8536 #define vld1_lane_p8(a, b, c) \
8537 __extension__ \
8538 ({ \
8539 poly8x8_t b_ = (b); \
8540 const poly8_t * a_ = (a); \
8541 poly8x8_t result; \
8542 __asm__ ("ld1 {%0.b}[%1], %2" \
8543 : "=w"(result) \
8544 : "i" (c), "Utv"(*a_), "0"(b_) \
8545 : /* No clobbers */); \
8546 result; \
8549 #define vld1_lane_p16(a, b, c) \
8550 __extension__ \
8551 ({ \
8552 poly16x4_t b_ = (b); \
8553 const poly16_t * a_ = (a); \
8554 poly16x4_t result; \
8555 __asm__ ("ld1 {%0.h}[%1], %2" \
8556 : "=w"(result) \
8557 : "i" (c), "Utv"(*a_), "0"(b_) \
8558 : /* No clobbers */); \
8559 result; \
8562 #define vld1_lane_s8(a, b, c) \
8563 __extension__ \
8564 ({ \
8565 int8x8_t b_ = (b); \
8566 const int8_t * a_ = (a); \
8567 int8x8_t result; \
8568 __asm__ ("ld1 {%0.b}[%1], %2" \
8569 : "=w"(result) \
8570 : "i" (c), "Utv"(*a_), "0"(b_) \
8571 : /* No clobbers */); \
8572 result; \
8575 #define vld1_lane_s16(a, b, c) \
8576 __extension__ \
8577 ({ \
8578 int16x4_t b_ = (b); \
8579 const int16_t * a_ = (a); \
8580 int16x4_t result; \
8581 __asm__ ("ld1 {%0.h}[%1], %2" \
8582 : "=w"(result) \
8583 : "i" (c), "Utv"(*a_), "0"(b_) \
8584 : /* No clobbers */); \
8585 result; \
8588 #define vld1_lane_s32(a, b, c) \
8589 __extension__ \
8590 ({ \
8591 int32x2_t b_ = (b); \
8592 const int32_t * a_ = (a); \
8593 int32x2_t result; \
8594 __asm__ ("ld1 {%0.s}[%1], %2" \
8595 : "=w"(result) \
8596 : "i" (c), "Utv"(*a_), "0"(b_) \
8597 : /* No clobbers */); \
8598 result; \
8601 #define vld1_lane_s64(a, b, c) \
8602 __extension__ \
8603 ({ \
8604 int64x1_t b_ = (b); \
8605 const int64_t * a_ = (a); \
8606 int64x1_t result; \
8607 __asm__ ("ld1 {%0.d}[%1], %2" \
8608 : "=w"(result) \
8609 : "i" (c), "Utv"(*a_), "0"(b_) \
8610 : /* No clobbers */); \
8611 result; \
8614 #define vld1_lane_u8(a, b, c) \
8615 __extension__ \
8616 ({ \
8617 uint8x8_t b_ = (b); \
8618 const uint8_t * a_ = (a); \
8619 uint8x8_t result; \
8620 __asm__ ("ld1 {%0.b}[%1], %2" \
8621 : "=w"(result) \
8622 : "i" (c), "Utv"(*a_), "0"(b_) \
8623 : /* No clobbers */); \
8624 result; \
8627 #define vld1_lane_u16(a, b, c) \
8628 __extension__ \
8629 ({ \
8630 uint16x4_t b_ = (b); \
8631 const uint16_t * a_ = (a); \
8632 uint16x4_t result; \
8633 __asm__ ("ld1 {%0.h}[%1], %2" \
8634 : "=w"(result) \
8635 : "i" (c), "Utv"(*a_), "0"(b_) \
8636 : /* No clobbers */); \
8637 result; \
8640 #define vld1_lane_u32(a, b, c) \
8641 __extension__ \
8642 ({ \
8643 uint32x2_t b_ = (b); \
8644 const uint32_t * a_ = (a); \
8645 uint32x2_t result; \
8646 __asm__ ("ld1 {%0.s}[%1], %2" \
8647 : "=w"(result) \
8648 : "i" (c), "Utv"(*a_), "0"(b_) \
8649 : /* No clobbers */); \
8650 result; \
8653 #define vld1_lane_u64(a, b, c) \
8654 __extension__ \
8655 ({ \
8656 uint64x1_t b_ = (b); \
8657 const uint64_t * a_ = (a); \
8658 uint64x1_t result; \
8659 __asm__ ("ld1 {%0.d}[%1], %2" \
8660 : "=w"(result) \
8661 : "i" (c), "Utv"(*a_), "0"(b_) \
8662 : /* No clobbers */); \
8663 result; \
8666 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8667 vld1_p8 (const poly8_t * a)
8669 poly8x8_t result;
8670 __asm__ ("ld1 {%0.8b}, %1"
8671 : "=w"(result)
8672 : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;}))
8673 : /* No clobbers */);
8674 return result;
8677 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
8678 vld1_p16 (const poly16_t * a)
8680 poly16x4_t result;
8681 __asm__ ("ld1 {%0.4h}, %1"
8682 : "=w"(result)
8683 : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;}))
8684 : /* No clobbers */);
8685 return result;
8688 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8689 vld1_s8 (const int8_t * a)
8691 int8x8_t result;
8692 __asm__ ("ld1 {%0.8b}, %1"
8693 : "=w"(result)
8694 : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;}))
8695 : /* No clobbers */);
8696 return result;
8699 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8700 vld1_s16 (const int16_t * a)
8702 int16x4_t result;
8703 __asm__ ("ld1 {%0.4h}, %1"
8704 : "=w"(result)
8705 : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;}))
8706 : /* No clobbers */);
8707 return result;
8710 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8711 vld1_s32 (const int32_t * a)
8713 int32x2_t result;
8714 __asm__ ("ld1 {%0.2s}, %1"
8715 : "=w"(result)
8716 : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;}))
8717 : /* No clobbers */);
8718 return result;
8721 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
8722 vld1_s64 (const int64_t * a)
8724 int64x1_t result;
8725 __asm__ ("ld1 {%0.1d}, %1"
8726 : "=w"(result)
8727 : "Utv"(*a)
8728 : /* No clobbers */);
8729 return result;
8732 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8733 vld1_u8 (const uint8_t * a)
8735 uint8x8_t result;
8736 __asm__ ("ld1 {%0.8b}, %1"
8737 : "=w"(result)
8738 : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;}))
8739 : /* No clobbers */);
8740 return result;
8743 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8744 vld1_u16 (const uint16_t * a)
8746 uint16x4_t result;
8747 __asm__ ("ld1 {%0.4h}, %1"
8748 : "=w"(result)
8749 : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;}))
8750 : /* No clobbers */);
8751 return result;
8754 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8755 vld1_u32 (const uint32_t * a)
8757 uint32x2_t result;
8758 __asm__ ("ld1 {%0.2s}, %1"
8759 : "=w"(result)
8760 : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;}))
8761 : /* No clobbers */);
8762 return result;
8765 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
8766 vld1_u64 (const uint64_t * a)
8768 uint64x1_t result;
8769 __asm__ ("ld1 {%0.1d}, %1"
8770 : "=w"(result)
8771 : "Utv"(*a)
8772 : /* No clobbers */);
8773 return result;
8776 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8777 vld1q_dup_f32 (const float32_t * a)
8779 float32x4_t result;
8780 __asm__ ("ld1r {%0.4s}, %1"
8781 : "=w"(result)
8782 : "Utv"(*a)
8783 : /* No clobbers */);
8784 return result;
8787 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8788 vld1q_dup_f64 (const float64_t * a)
8790 float64x2_t result;
8791 __asm__ ("ld1r {%0.2d}, %1"
8792 : "=w"(result)
8793 : "Utv"(*a)
8794 : /* No clobbers */);
8795 return result;
8798 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
8799 vld1q_dup_p8 (const poly8_t * a)
8801 poly8x16_t result;
8802 __asm__ ("ld1r {%0.16b}, %1"
8803 : "=w"(result)
8804 : "Utv"(*a)
8805 : /* No clobbers */);
8806 return result;
8809 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8810 vld1q_dup_p16 (const poly16_t * a)
8812 poly16x8_t result;
8813 __asm__ ("ld1r {%0.8h}, %1"
8814 : "=w"(result)
8815 : "Utv"(*a)
8816 : /* No clobbers */);
8817 return result;
8820 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8821 vld1q_dup_s8 (const int8_t * a)
8823 int8x16_t result;
8824 __asm__ ("ld1r {%0.16b}, %1"
8825 : "=w"(result)
8826 : "Utv"(*a)
8827 : /* No clobbers */);
8828 return result;
8831 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8832 vld1q_dup_s16 (const int16_t * a)
8834 int16x8_t result;
8835 __asm__ ("ld1r {%0.8h}, %1"
8836 : "=w"(result)
8837 : "Utv"(*a)
8838 : /* No clobbers */);
8839 return result;
8842 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8843 vld1q_dup_s32 (const int32_t * a)
8845 int32x4_t result;
8846 __asm__ ("ld1r {%0.4s}, %1"
8847 : "=w"(result)
8848 : "Utv"(*a)
8849 : /* No clobbers */);
8850 return result;
8853 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8854 vld1q_dup_s64 (const int64_t * a)
8856 int64x2_t result;
8857 __asm__ ("ld1r {%0.2d}, %1"
8858 : "=w"(result)
8859 : "Utv"(*a)
8860 : /* No clobbers */);
8861 return result;
8864 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8865 vld1q_dup_u8 (const uint8_t * a)
8867 uint8x16_t result;
8868 __asm__ ("ld1r {%0.16b}, %1"
8869 : "=w"(result)
8870 : "Utv"(*a)
8871 : /* No clobbers */);
8872 return result;
8875 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8876 vld1q_dup_u16 (const uint16_t * a)
8878 uint16x8_t result;
8879 __asm__ ("ld1r {%0.8h}, %1"
8880 : "=w"(result)
8881 : "Utv"(*a)
8882 : /* No clobbers */);
8883 return result;
8886 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8887 vld1q_dup_u32 (const uint32_t * a)
8889 uint32x4_t result;
8890 __asm__ ("ld1r {%0.4s}, %1"
8891 : "=w"(result)
8892 : "Utv"(*a)
8893 : /* No clobbers */);
8894 return result;
8897 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8898 vld1q_dup_u64 (const uint64_t * a)
8900 uint64x2_t result;
8901 __asm__ ("ld1r {%0.2d}, %1"
8902 : "=w"(result)
8903 : "Utv"(*a)
8904 : /* No clobbers */);
8905 return result;
8908 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8909 vld1q_f32 (const float32_t * a)
8911 float32x4_t result;
8912 __asm__ ("ld1 {%0.4s}, %1"
8913 : "=w"(result)
8914 : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;}))
8915 : /* No clobbers */);
8916 return result;
8919 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8920 vld1q_f64 (const float64_t * a)
8922 float64x2_t result;
8923 __asm__ ("ld1 {%0.2d}, %1"
8924 : "=w"(result)
8925 : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;}))
8926 : /* No clobbers */);
8927 return result;
8930 #define vld1q_lane_f32(a, b, c) \
8931 __extension__ \
8932 ({ \
8933 float32x4_t b_ = (b); \
8934 const float32_t * a_ = (a); \
8935 float32x4_t result; \
8936 __asm__ ("ld1 {%0.s}[%1], %2" \
8937 : "=w"(result) \
8938 : "i"(c), "Utv"(*a_), "0"(b_) \
8939 : /* No clobbers */); \
8940 result; \
8943 #define vld1q_lane_f64(a, b, c) \
8944 __extension__ \
8945 ({ \
8946 float64x2_t b_ = (b); \
8947 const float64_t * a_ = (a); \
8948 float64x2_t result; \
8949 __asm__ ("ld1 {%0.d}[%1], %2" \
8950 : "=w"(result) \
8951 : "i"(c), "Utv"(*a_), "0"(b_) \
8952 : /* No clobbers */); \
8953 result; \
8956 #define vld1q_lane_p8(a, b, c) \
8957 __extension__ \
8958 ({ \
8959 poly8x16_t b_ = (b); \
8960 const poly8_t * a_ = (a); \
8961 poly8x16_t result; \
8962 __asm__ ("ld1 {%0.b}[%1], %2" \
8963 : "=w"(result) \
8964 : "i"(c), "Utv"(*a_), "0"(b_) \
8965 : /* No clobbers */); \
8966 result; \
8969 #define vld1q_lane_p16(a, b, c) \
8970 __extension__ \
8971 ({ \
8972 poly16x8_t b_ = (b); \
8973 const poly16_t * a_ = (a); \
8974 poly16x8_t result; \
8975 __asm__ ("ld1 {%0.h}[%1], %2" \
8976 : "=w"(result) \
8977 : "i"(c), "Utv"(*a_), "0"(b_) \
8978 : /* No clobbers */); \
8979 result; \
8982 #define vld1q_lane_s8(a, b, c) \
8983 __extension__ \
8984 ({ \
8985 int8x16_t b_ = (b); \
8986 const int8_t * a_ = (a); \
8987 int8x16_t result; \
8988 __asm__ ("ld1 {%0.b}[%1], %2" \
8989 : "=w"(result) \
8990 : "i"(c), "Utv"(*a_), "0"(b_) \
8991 : /* No clobbers */); \
8992 result; \
8995 #define vld1q_lane_s16(a, b, c) \
8996 __extension__ \
8997 ({ \
8998 int16x8_t b_ = (b); \
8999 const int16_t * a_ = (a); \
9000 int16x8_t result; \
9001 __asm__ ("ld1 {%0.h}[%1], %2" \
9002 : "=w"(result) \
9003 : "i"(c), "Utv"(*a_), "0"(b_) \
9004 : /* No clobbers */); \
9005 result; \
9008 #define vld1q_lane_s32(a, b, c) \
9009 __extension__ \
9010 ({ \
9011 int32x4_t b_ = (b); \
9012 const int32_t * a_ = (a); \
9013 int32x4_t result; \
9014 __asm__ ("ld1 {%0.s}[%1], %2" \
9015 : "=w"(result) \
9016 : "i"(c), "Utv"(*a_), "0"(b_) \
9017 : /* No clobbers */); \
9018 result; \
9021 #define vld1q_lane_s64(a, b, c) \
9022 __extension__ \
9023 ({ \
9024 int64x2_t b_ = (b); \
9025 const int64_t * a_ = (a); \
9026 int64x2_t result; \
9027 __asm__ ("ld1 {%0.d}[%1], %2" \
9028 : "=w"(result) \
9029 : "i"(c), "Utv"(*a_), "0"(b_) \
9030 : /* No clobbers */); \
9031 result; \
9034 #define vld1q_lane_u8(a, b, c) \
9035 __extension__ \
9036 ({ \
9037 uint8x16_t b_ = (b); \
9038 const uint8_t * a_ = (a); \
9039 uint8x16_t result; \
9040 __asm__ ("ld1 {%0.b}[%1], %2" \
9041 : "=w"(result) \
9042 : "i"(c), "Utv"(*a_), "0"(b_) \
9043 : /* No clobbers */); \
9044 result; \
9047 #define vld1q_lane_u16(a, b, c) \
9048 __extension__ \
9049 ({ \
9050 uint16x8_t b_ = (b); \
9051 const uint16_t * a_ = (a); \
9052 uint16x8_t result; \
9053 __asm__ ("ld1 {%0.h}[%1], %2" \
9054 : "=w"(result) \
9055 : "i"(c), "Utv"(*a_), "0"(b_) \
9056 : /* No clobbers */); \
9057 result; \
9060 #define vld1q_lane_u32(a, b, c) \
9061 __extension__ \
9062 ({ \
9063 uint32x4_t b_ = (b); \
9064 const uint32_t * a_ = (a); \
9065 uint32x4_t result; \
9066 __asm__ ("ld1 {%0.s}[%1], %2" \
9067 : "=w"(result) \
9068 : "i"(c), "Utv"(*a_), "0"(b_) \
9069 : /* No clobbers */); \
9070 result; \
9073 #define vld1q_lane_u64(a, b, c) \
9074 __extension__ \
9075 ({ \
9076 uint64x2_t b_ = (b); \
9077 const uint64_t * a_ = (a); \
9078 uint64x2_t result; \
9079 __asm__ ("ld1 {%0.d}[%1], %2" \
9080 : "=w"(result) \
9081 : "i"(c), "Utv"(*a_), "0"(b_) \
9082 : /* No clobbers */); \
9083 result; \
9086 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9087 vld1q_p8 (const poly8_t * a)
9089 poly8x16_t result;
9090 __asm__ ("ld1 {%0.16b}, %1"
9091 : "=w"(result)
9092 : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;}))
9093 : /* No clobbers */);
9094 return result;
9097 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9098 vld1q_p16 (const poly16_t * a)
9100 poly16x8_t result;
9101 __asm__ ("ld1 {%0.16b}, %1"
9102 : "=w"(result)
9103 : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;}))
9104 : /* No clobbers */);
9105 return result;
9108 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9109 vld1q_s8 (const int8_t * a)
9111 int8x16_t result;
9112 __asm__ ("ld1 {%0.16b}, %1"
9113 : "=w"(result)
9114 : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;}))
9115 : /* No clobbers */);
9116 return result;
9119 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9120 vld1q_s16 (const int16_t * a)
9122 int16x8_t result;
9123 __asm__ ("ld1 {%0.8h}, %1"
9124 : "=w"(result)
9125 : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;}))
9126 : /* No clobbers */);
9127 return result;
9130 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9131 vld1q_s32 (const int32_t * a)
9133 int32x4_t result;
9134 __asm__ ("ld1 {%0.4s}, %1"
9135 : "=w"(result)
9136 : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;}))
9137 : /* No clobbers */);
9138 return result;
9141 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9142 vld1q_s64 (const int64_t * a)
9144 int64x2_t result;
9145 __asm__ ("ld1 {%0.2d}, %1"
9146 : "=w"(result)
9147 : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;}))
9148 : /* No clobbers */);
9149 return result;
9152 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9153 vld1q_u8 (const uint8_t * a)
9155 uint8x16_t result;
9156 __asm__ ("ld1 {%0.16b}, %1"
9157 : "=w"(result)
9158 : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;}))
9159 : /* No clobbers */);
9160 return result;
9163 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9164 vld1q_u16 (const uint16_t * a)
9166 uint16x8_t result;
9167 __asm__ ("ld1 {%0.8h}, %1"
9168 : "=w"(result)
9169 : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;}))
9170 : /* No clobbers */);
9171 return result;
9174 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9175 vld1q_u32 (const uint32_t * a)
9177 uint32x4_t result;
9178 __asm__ ("ld1 {%0.4s}, %1"
9179 : "=w"(result)
9180 : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;}))
9181 : /* No clobbers */);
9182 return result;
9185 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9186 vld1q_u64 (const uint64_t * a)
9188 uint64x2_t result;
9189 __asm__ ("ld1 {%0.2d}, %1"
9190 : "=w"(result)
9191 : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;}))
9192 : /* No clobbers */);
9193 return result;
9196 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9197 vmaxnm_f32 (float32x2_t a, float32x2_t b)
9199 float32x2_t result;
9200 __asm__ ("fmaxnm %0.2s,%1.2s,%2.2s"
9201 : "=w"(result)
9202 : "w"(a), "w"(b)
9203 : /* No clobbers */);
9204 return result;
9207 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9208 vmaxnmq_f32 (float32x4_t a, float32x4_t b)
9210 float32x4_t result;
9211 __asm__ ("fmaxnm %0.4s,%1.4s,%2.4s"
9212 : "=w"(result)
9213 : "w"(a), "w"(b)
9214 : /* No clobbers */);
9215 return result;
9218 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9219 vmaxnmq_f64 (float64x2_t a, float64x2_t b)
9221 float64x2_t result;
9222 __asm__ ("fmaxnm %0.2d,%1.2d,%2.2d"
9223 : "=w"(result)
9224 : "w"(a), "w"(b)
9225 : /* No clobbers */);
9226 return result;
9229 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9230 vmaxnmvq_f32 (float32x4_t a)
9232 float32_t result;
9233 __asm__ ("fmaxnmv %s0,%1.4s"
9234 : "=w"(result)
9235 : "w"(a)
9236 : /* No clobbers */);
9237 return result;
9240 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
9241 vmaxv_s8 (int8x8_t a)
9243 int8_t result;
9244 __asm__ ("smaxv %b0,%1.8b"
9245 : "=w"(result)
9246 : "w"(a)
9247 : /* No clobbers */);
9248 return result;
9251 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
9252 vmaxv_s16 (int16x4_t a)
9254 int16_t result;
9255 __asm__ ("smaxv %h0,%1.4h"
9256 : "=w"(result)
9257 : "w"(a)
9258 : /* No clobbers */);
9259 return result;
9262 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
9263 vmaxv_u8 (uint8x8_t a)
9265 uint8_t result;
9266 __asm__ ("umaxv %b0,%1.8b"
9267 : "=w"(result)
9268 : "w"(a)
9269 : /* No clobbers */);
9270 return result;
9273 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
9274 vmaxv_u16 (uint16x4_t a)
9276 uint16_t result;
9277 __asm__ ("umaxv %h0,%1.4h"
9278 : "=w"(result)
9279 : "w"(a)
9280 : /* No clobbers */);
9281 return result;
9284 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9285 vmaxvq_f32 (float32x4_t a)
9287 float32_t result;
9288 __asm__ ("fmaxv %s0,%1.4s"
9289 : "=w"(result)
9290 : "w"(a)
9291 : /* No clobbers */);
9292 return result;
9295 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
9296 vmaxvq_s8 (int8x16_t a)
9298 int8_t result;
9299 __asm__ ("smaxv %b0,%1.16b"
9300 : "=w"(result)
9301 : "w"(a)
9302 : /* No clobbers */);
9303 return result;
9306 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
9307 vmaxvq_s16 (int16x8_t a)
9309 int16_t result;
9310 __asm__ ("smaxv %h0,%1.8h"
9311 : "=w"(result)
9312 : "w"(a)
9313 : /* No clobbers */);
9314 return result;
9317 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
9318 vmaxvq_s32 (int32x4_t a)
9320 int32_t result;
9321 __asm__ ("smaxv %s0,%1.4s"
9322 : "=w"(result)
9323 : "w"(a)
9324 : /* No clobbers */);
9325 return result;
9328 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
9329 vmaxvq_u8 (uint8x16_t a)
9331 uint8_t result;
9332 __asm__ ("umaxv %b0,%1.16b"
9333 : "=w"(result)
9334 : "w"(a)
9335 : /* No clobbers */);
9336 return result;
9339 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
9340 vmaxvq_u16 (uint16x8_t a)
9342 uint16_t result;
9343 __asm__ ("umaxv %h0,%1.8h"
9344 : "=w"(result)
9345 : "w"(a)
9346 : /* No clobbers */);
9347 return result;
9350 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9351 vmaxvq_u32 (uint32x4_t a)
9353 uint32_t result;
9354 __asm__ ("umaxv %s0,%1.4s"
9355 : "=w"(result)
9356 : "w"(a)
9357 : /* No clobbers */);
9358 return result;
9361 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9362 vminnmvq_f32 (float32x4_t a)
9364 float32_t result;
9365 __asm__ ("fminnmv %s0,%1.4s"
9366 : "=w"(result)
9367 : "w"(a)
9368 : /* No clobbers */);
9369 return result;
9372 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
9373 vminv_s8 (int8x8_t a)
9375 int8_t result;
9376 __asm__ ("sminv %b0,%1.8b"
9377 : "=w"(result)
9378 : "w"(a)
9379 : /* No clobbers */);
9380 return result;
9383 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
9384 vminv_s16 (int16x4_t a)
9386 int16_t result;
9387 __asm__ ("sminv %h0,%1.4h"
9388 : "=w"(result)
9389 : "w"(a)
9390 : /* No clobbers */);
9391 return result;
9394 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
9395 vminv_u8 (uint8x8_t a)
9397 uint8_t result;
9398 __asm__ ("uminv %b0,%1.8b"
9399 : "=w"(result)
9400 : "w"(a)
9401 : /* No clobbers */);
9402 return result;
9405 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
9406 vminv_u16 (uint16x4_t a)
9408 uint16_t result;
9409 __asm__ ("uminv %h0,%1.4h"
9410 : "=w"(result)
9411 : "w"(a)
9412 : /* No clobbers */);
9413 return result;
9416 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9417 vminvq_f32 (float32x4_t a)
9419 float32_t result;
9420 __asm__ ("fminv %s0,%1.4s"
9421 : "=w"(result)
9422 : "w"(a)
9423 : /* No clobbers */);
9424 return result;
9427 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
9428 vminvq_s8 (int8x16_t a)
9430 int8_t result;
9431 __asm__ ("sminv %b0,%1.16b"
9432 : "=w"(result)
9433 : "w"(a)
9434 : /* No clobbers */);
9435 return result;
9438 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
9439 vminvq_s16 (int16x8_t a)
9441 int16_t result;
9442 __asm__ ("sminv %h0,%1.8h"
9443 : "=w"(result)
9444 : "w"(a)
9445 : /* No clobbers */);
9446 return result;
9449 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
9450 vminvq_s32 (int32x4_t a)
9452 int32_t result;
9453 __asm__ ("sminv %s0,%1.4s"
9454 : "=w"(result)
9455 : "w"(a)
9456 : /* No clobbers */);
9457 return result;
9460 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
9461 vminvq_u8 (uint8x16_t a)
9463 uint8_t result;
9464 __asm__ ("uminv %b0,%1.16b"
9465 : "=w"(result)
9466 : "w"(a)
9467 : /* No clobbers */);
9468 return result;
9471 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
9472 vminvq_u16 (uint16x8_t a)
9474 uint16_t result;
9475 __asm__ ("uminv %h0,%1.8h"
9476 : "=w"(result)
9477 : "w"(a)
9478 : /* No clobbers */);
9479 return result;
9482 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
9483 vminvq_u32 (uint32x4_t a)
9485 uint32_t result;
9486 __asm__ ("uminv %s0,%1.4s"
9487 : "=w"(result)
9488 : "w"(a)
9489 : /* No clobbers */);
9490 return result;
9493 #define vmla_lane_f32(a, b, c, d) \
9494 __extension__ \
9495 ({ \
9496 float32x2_t c_ = (c); \
9497 float32x2_t b_ = (b); \
9498 float32x2_t a_ = (a); \
9499 float32x2_t result; \
9500 float32x2_t t1; \
9501 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fadd %0.2s, %0.2s, %1.2s" \
9502 : "=w"(result), "=w"(t1) \
9503 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9504 : /* No clobbers */); \
9505 result; \
9508 #define vmla_lane_s16(a, b, c, d) \
9509 __extension__ \
9510 ({ \
9511 int16x4_t c_ = (c); \
9512 int16x4_t b_ = (b); \
9513 int16x4_t a_ = (a); \
9514 int16x4_t result; \
9515 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
9516 : "=w"(result) \
9517 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9518 : /* No clobbers */); \
9519 result; \
9522 #define vmla_lane_s32(a, b, c, d) \
9523 __extension__ \
9524 ({ \
9525 int32x2_t c_ = (c); \
9526 int32x2_t b_ = (b); \
9527 int32x2_t a_ = (a); \
9528 int32x2_t result; \
9529 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
9530 : "=w"(result) \
9531 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9532 : /* No clobbers */); \
9533 result; \
9536 #define vmla_lane_u16(a, b, c, d) \
9537 __extension__ \
9538 ({ \
9539 uint16x4_t c_ = (c); \
9540 uint16x4_t b_ = (b); \
9541 uint16x4_t a_ = (a); \
9542 uint16x4_t result; \
9543 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
9544 : "=w"(result) \
9545 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9546 : /* No clobbers */); \
9547 result; \
9550 #define vmla_lane_u32(a, b, c, d) \
9551 __extension__ \
9552 ({ \
9553 uint32x2_t c_ = (c); \
9554 uint32x2_t b_ = (b); \
9555 uint32x2_t a_ = (a); \
9556 uint32x2_t result; \
9557 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
9558 : "=w"(result) \
9559 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9560 : /* No clobbers */); \
9561 result; \
9564 #define vmla_laneq_s16(a, b, c, d) \
9565 __extension__ \
9566 ({ \
9567 int16x8_t c_ = (c); \
9568 int16x4_t b_ = (b); \
9569 int16x4_t a_ = (a); \
9570 int16x4_t result; \
9571 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
9572 : "=w"(result) \
9573 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9574 : /* No clobbers */); \
9575 result; \
9578 #define vmla_laneq_s32(a, b, c, d) \
9579 __extension__ \
9580 ({ \
9581 int32x4_t c_ = (c); \
9582 int32x2_t b_ = (b); \
9583 int32x2_t a_ = (a); \
9584 int32x2_t result; \
9585 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
9586 : "=w"(result) \
9587 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9588 : /* No clobbers */); \
9589 result; \
9592 #define vmla_laneq_u16(a, b, c, d) \
9593 __extension__ \
9594 ({ \
9595 uint16x8_t c_ = (c); \
9596 uint16x4_t b_ = (b); \
9597 uint16x4_t a_ = (a); \
9598 uint16x4_t result; \
9599 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
9600 : "=w"(result) \
9601 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9602 : /* No clobbers */); \
9603 result; \
9606 #define vmla_laneq_u32(a, b, c, d) \
9607 __extension__ \
9608 ({ \
9609 uint32x4_t c_ = (c); \
9610 uint32x2_t b_ = (b); \
9611 uint32x2_t a_ = (a); \
9612 uint32x2_t result; \
9613 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
9614 : "=w"(result) \
9615 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9616 : /* No clobbers */); \
9617 result; \
9620 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9621 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
9623 float32x2_t result;
9624 float32x2_t t1;
9625 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
9626 : "=w"(result), "=w"(t1)
9627 : "0"(a), "w"(b), "w"(c)
9628 : /* No clobbers */);
9629 return result;
9632 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9633 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
9635 int16x4_t result;
9636 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
9637 : "=w"(result)
9638 : "0"(a), "w"(b), "w"(c)
9639 : /* No clobbers */);
9640 return result;
9643 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9644 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
9646 int32x2_t result;
9647 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
9648 : "=w"(result)
9649 : "0"(a), "w"(b), "w"(c)
9650 : /* No clobbers */);
9651 return result;
9654 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9655 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
9657 uint16x4_t result;
9658 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
9659 : "=w"(result)
9660 : "0"(a), "w"(b), "w"(c)
9661 : /* No clobbers */);
9662 return result;
9665 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9666 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
9668 uint32x2_t result;
9669 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
9670 : "=w"(result)
9671 : "0"(a), "w"(b), "w"(c)
9672 : /* No clobbers */);
9673 return result;
9676 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9677 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
9679 int8x8_t result;
9680 __asm__ ("mla %0.8b, %2.8b, %3.8b"
9681 : "=w"(result)
9682 : "0"(a), "w"(b), "w"(c)
9683 : /* No clobbers */);
9684 return result;
9687 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9688 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
9690 int16x4_t result;
9691 __asm__ ("mla %0.4h, %2.4h, %3.4h"
9692 : "=w"(result)
9693 : "0"(a), "w"(b), "w"(c)
9694 : /* No clobbers */);
9695 return result;
9698 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9699 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
9701 int32x2_t result;
9702 __asm__ ("mla %0.2s, %2.2s, %3.2s"
9703 : "=w"(result)
9704 : "0"(a), "w"(b), "w"(c)
9705 : /* No clobbers */);
9706 return result;
9709 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9710 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
9712 uint8x8_t result;
9713 __asm__ ("mla %0.8b, %2.8b, %3.8b"
9714 : "=w"(result)
9715 : "0"(a), "w"(b), "w"(c)
9716 : /* No clobbers */);
9717 return result;
9720 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9721 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
9723 uint16x4_t result;
9724 __asm__ ("mla %0.4h, %2.4h, %3.4h"
9725 : "=w"(result)
9726 : "0"(a), "w"(b), "w"(c)
9727 : /* No clobbers */);
9728 return result;
9731 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9732 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
9734 uint32x2_t result;
9735 __asm__ ("mla %0.2s, %2.2s, %3.2s"
9736 : "=w"(result)
9737 : "0"(a), "w"(b), "w"(c)
9738 : /* No clobbers */);
9739 return result;
9742 #define vmlal_high_lane_s16(a, b, c, d) \
9743 __extension__ \
9744 ({ \
9745 int16x8_t c_ = (c); \
9746 int16x8_t b_ = (b); \
9747 int32x4_t a_ = (a); \
9748 int32x4_t result; \
9749 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
9750 : "=w"(result) \
9751 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9752 : /* No clobbers */); \
9753 result; \
9756 #define vmlal_high_lane_s32(a, b, c, d) \
9757 __extension__ \
9758 ({ \
9759 int32x4_t c_ = (c); \
9760 int32x4_t b_ = (b); \
9761 int64x2_t a_ = (a); \
9762 int64x2_t result; \
9763 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
9764 : "=w"(result) \
9765 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9766 : /* No clobbers */); \
9767 result; \
9770 #define vmlal_high_lane_u16(a, b, c, d) \
9771 __extension__ \
9772 ({ \
9773 uint16x8_t c_ = (c); \
9774 uint16x8_t b_ = (b); \
9775 uint32x4_t a_ = (a); \
9776 uint32x4_t result; \
9777 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
9778 : "=w"(result) \
9779 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9780 : /* No clobbers */); \
9781 result; \
9784 #define vmlal_high_lane_u32(a, b, c, d) \
9785 __extension__ \
9786 ({ \
9787 uint32x4_t c_ = (c); \
9788 uint32x4_t b_ = (b); \
9789 uint64x2_t a_ = (a); \
9790 uint64x2_t result; \
9791 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
9792 : "=w"(result) \
9793 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9794 : /* No clobbers */); \
9795 result; \
9798 #define vmlal_high_laneq_s16(a, b, c, d) \
9799 __extension__ \
9800 ({ \
9801 int16x8_t c_ = (c); \
9802 int16x8_t b_ = (b); \
9803 int32x4_t a_ = (a); \
9804 int32x4_t result; \
9805 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
9806 : "=w"(result) \
9807 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9808 : /* No clobbers */); \
9809 result; \
9812 #define vmlal_high_laneq_s32(a, b, c, d) \
9813 __extension__ \
9814 ({ \
9815 int32x4_t c_ = (c); \
9816 int32x4_t b_ = (b); \
9817 int64x2_t a_ = (a); \
9818 int64x2_t result; \
9819 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
9820 : "=w"(result) \
9821 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9822 : /* No clobbers */); \
9823 result; \
9826 #define vmlal_high_laneq_u16(a, b, c, d) \
9827 __extension__ \
9828 ({ \
9829 uint16x8_t c_ = (c); \
9830 uint16x8_t b_ = (b); \
9831 uint32x4_t a_ = (a); \
9832 uint32x4_t result; \
9833 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
9834 : "=w"(result) \
9835 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9836 : /* No clobbers */); \
9837 result; \
9840 #define vmlal_high_laneq_u32(a, b, c, d) \
9841 __extension__ \
9842 ({ \
9843 uint32x4_t c_ = (c); \
9844 uint32x4_t b_ = (b); \
9845 uint64x2_t a_ = (a); \
9846 uint64x2_t result; \
9847 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
9848 : "=w"(result) \
9849 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9850 : /* No clobbers */); \
9851 result; \
9854 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9855 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
9857 int32x4_t result;
9858 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
9859 : "=w"(result)
9860 : "0"(a), "w"(b), "w"(c)
9861 : /* No clobbers */);
9862 return result;
9865 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9866 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
9868 int64x2_t result;
9869 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
9870 : "=w"(result)
9871 : "0"(a), "w"(b), "w"(c)
9872 : /* No clobbers */);
9873 return result;
9876 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9877 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
9879 uint32x4_t result;
9880 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
9881 : "=w"(result)
9882 : "0"(a), "w"(b), "w"(c)
9883 : /* No clobbers */);
9884 return result;
9887 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9888 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
9890 uint64x2_t result;
9891 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
9892 : "=w"(result)
9893 : "0"(a), "w"(b), "w"(c)
9894 : /* No clobbers */);
9895 return result;
9898 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9899 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
9901 int16x8_t result;
9902 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
9903 : "=w"(result)
9904 : "0"(a), "w"(b), "w"(c)
9905 : /* No clobbers */);
9906 return result;
9909 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9910 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
9912 int32x4_t result;
9913 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
9914 : "=w"(result)
9915 : "0"(a), "w"(b), "w"(c)
9916 : /* No clobbers */);
9917 return result;
9920 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9921 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
9923 int64x2_t result;
9924 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
9925 : "=w"(result)
9926 : "0"(a), "w"(b), "w"(c)
9927 : /* No clobbers */);
9928 return result;
9931 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9932 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
9934 uint16x8_t result;
9935 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
9936 : "=w"(result)
9937 : "0"(a), "w"(b), "w"(c)
9938 : /* No clobbers */);
9939 return result;
9942 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9943 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
9945 uint32x4_t result;
9946 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
9947 : "=w"(result)
9948 : "0"(a), "w"(b), "w"(c)
9949 : /* No clobbers */);
9950 return result;
9953 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9954 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
9956 uint64x2_t result;
9957 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
9958 : "=w"(result)
9959 : "0"(a), "w"(b), "w"(c)
9960 : /* No clobbers */);
9961 return result;
9964 #define vmlal_lane_s16(a, b, c, d) \
9965 __extension__ \
9966 ({ \
9967 int16x4_t c_ = (c); \
9968 int16x4_t b_ = (b); \
9969 int32x4_t a_ = (a); \
9970 int32x4_t result; \
9971 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
9972 : "=w"(result) \
9973 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9974 : /* No clobbers */); \
9975 result; \
9978 #define vmlal_lane_s32(a, b, c, d) \
9979 __extension__ \
9980 ({ \
9981 int32x2_t c_ = (c); \
9982 int32x2_t b_ = (b); \
9983 int64x2_t a_ = (a); \
9984 int64x2_t result; \
9985 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
9986 : "=w"(result) \
9987 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9988 : /* No clobbers */); \
9989 result; \
9992 #define vmlal_lane_u16(a, b, c, d) \
9993 __extension__ \
9994 ({ \
9995 uint16x4_t c_ = (c); \
9996 uint16x4_t b_ = (b); \
9997 uint32x4_t a_ = (a); \
9998 uint32x4_t result; \
9999 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
10000 : "=w"(result) \
10001 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10002 : /* No clobbers */); \
10003 result; \
10006 #define vmlal_lane_u32(a, b, c, d) \
10007 __extension__ \
10008 ({ \
10009 uint32x2_t c_ = (c); \
10010 uint32x2_t b_ = (b); \
10011 uint64x2_t a_ = (a); \
10012 uint64x2_t result; \
10013 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
10014 : "=w"(result) \
10015 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10016 : /* No clobbers */); \
10017 result; \
10020 #define vmlal_laneq_s16(a, b, c, d) \
10021 __extension__ \
10022 ({ \
10023 int16x8_t c_ = (c); \
10024 int16x4_t b_ = (b); \
10025 int32x4_t a_ = (a); \
10026 int32x4_t result; \
10027 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
10028 : "=w"(result) \
10029 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10030 : /* No clobbers */); \
10031 result; \
10034 #define vmlal_laneq_s32(a, b, c, d) \
10035 __extension__ \
10036 ({ \
10037 int32x4_t c_ = (c); \
10038 int32x2_t b_ = (b); \
10039 int64x2_t a_ = (a); \
10040 int64x2_t result; \
10041 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
10042 : "=w"(result) \
10043 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10044 : /* No clobbers */); \
10045 result; \
10048 #define vmlal_laneq_u16(a, b, c, d) \
10049 __extension__ \
10050 ({ \
10051 uint16x8_t c_ = (c); \
10052 uint16x4_t b_ = (b); \
10053 uint32x4_t a_ = (a); \
10054 uint32x4_t result; \
10055 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
10056 : "=w"(result) \
10057 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10058 : /* No clobbers */); \
10059 result; \
10062 #define vmlal_laneq_u32(a, b, c, d) \
10063 __extension__ \
10064 ({ \
10065 uint32x4_t c_ = (c); \
10066 uint32x2_t b_ = (b); \
10067 uint64x2_t a_ = (a); \
10068 uint64x2_t result; \
10069 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
10070 : "=w"(result) \
10071 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10072 : /* No clobbers */); \
10073 result; \
10076 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10077 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
10079 int32x4_t result;
10080 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
10081 : "=w"(result)
10082 : "0"(a), "w"(b), "w"(c)
10083 : /* No clobbers */);
10084 return result;
10087 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10088 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
10090 int64x2_t result;
10091 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
10092 : "=w"(result)
10093 : "0"(a), "w"(b), "w"(c)
10094 : /* No clobbers */);
10095 return result;
10098 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10099 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
10101 uint32x4_t result;
10102 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
10103 : "=w"(result)
10104 : "0"(a), "w"(b), "w"(c)
10105 : /* No clobbers */);
10106 return result;
10109 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10110 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
10112 uint64x2_t result;
10113 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
10114 : "=w"(result)
10115 : "0"(a), "w"(b), "w"(c)
10116 : /* No clobbers */);
10117 return result;
10120 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10121 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
10123 int16x8_t result;
10124 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
10125 : "=w"(result)
10126 : "0"(a), "w"(b), "w"(c)
10127 : /* No clobbers */);
10128 return result;
10131 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10132 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
10134 int32x4_t result;
10135 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
10136 : "=w"(result)
10137 : "0"(a), "w"(b), "w"(c)
10138 : /* No clobbers */);
10139 return result;
10142 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10143 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
10145 int64x2_t result;
10146 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
10147 : "=w"(result)
10148 : "0"(a), "w"(b), "w"(c)
10149 : /* No clobbers */);
10150 return result;
10153 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10154 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
10156 uint16x8_t result;
10157 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
10158 : "=w"(result)
10159 : "0"(a), "w"(b), "w"(c)
10160 : /* No clobbers */);
10161 return result;
10164 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10165 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
10167 uint32x4_t result;
10168 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
10169 : "=w"(result)
10170 : "0"(a), "w"(b), "w"(c)
10171 : /* No clobbers */);
10172 return result;
10175 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10176 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
10178 uint64x2_t result;
10179 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
10180 : "=w"(result)
10181 : "0"(a), "w"(b), "w"(c)
10182 : /* No clobbers */);
10183 return result;
10186 #define vmlaq_lane_f32(a, b, c, d) \
10187 __extension__ \
10188 ({ \
10189 float32x4_t c_ = (c); \
10190 float32x4_t b_ = (b); \
10191 float32x4_t a_ = (a); \
10192 float32x4_t result; \
10193 float32x4_t t1; \
10194 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fadd %0.4s, %0.4s, %1.4s" \
10195 : "=w"(result), "=w"(t1) \
10196 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10197 : /* No clobbers */); \
10198 result; \
10201 #define vmlaq_lane_s16(a, b, c, d) \
10202 __extension__ \
10203 ({ \
10204 int16x8_t c_ = (c); \
10205 int16x8_t b_ = (b); \
10206 int16x8_t a_ = (a); \
10207 int16x8_t result; \
10208 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
10209 : "=w"(result) \
10210 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10211 : /* No clobbers */); \
10212 result; \
10215 #define vmlaq_lane_s32(a, b, c, d) \
10216 __extension__ \
10217 ({ \
10218 int32x4_t c_ = (c); \
10219 int32x4_t b_ = (b); \
10220 int32x4_t a_ = (a); \
10221 int32x4_t result; \
10222 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
10223 : "=w"(result) \
10224 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10225 : /* No clobbers */); \
10226 result; \
10229 #define vmlaq_lane_u16(a, b, c, d) \
10230 __extension__ \
10231 ({ \
10232 uint16x8_t c_ = (c); \
10233 uint16x8_t b_ = (b); \
10234 uint16x8_t a_ = (a); \
10235 uint16x8_t result; \
10236 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
10237 : "=w"(result) \
10238 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10239 : /* No clobbers */); \
10240 result; \
10243 #define vmlaq_lane_u32(a, b, c, d) \
10244 __extension__ \
10245 ({ \
10246 uint32x4_t c_ = (c); \
10247 uint32x4_t b_ = (b); \
10248 uint32x4_t a_ = (a); \
10249 uint32x4_t result; \
10250 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
10251 : "=w"(result) \
10252 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10253 : /* No clobbers */); \
10254 result; \
10257 #define vmlaq_laneq_s16(a, b, c, d) \
10258 __extension__ \
10259 ({ \
10260 int16x8_t c_ = (c); \
10261 int16x8_t b_ = (b); \
10262 int16x8_t a_ = (a); \
10263 int16x8_t result; \
10264 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
10265 : "=w"(result) \
10266 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10267 : /* No clobbers */); \
10268 result; \
10271 #define vmlaq_laneq_s32(a, b, c, d) \
10272 __extension__ \
10273 ({ \
10274 int32x4_t c_ = (c); \
10275 int32x4_t b_ = (b); \
10276 int32x4_t a_ = (a); \
10277 int32x4_t result; \
10278 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
10279 : "=w"(result) \
10280 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10281 : /* No clobbers */); \
10282 result; \
10285 #define vmlaq_laneq_u16(a, b, c, d) \
10286 __extension__ \
10287 ({ \
10288 uint16x8_t c_ = (c); \
10289 uint16x8_t b_ = (b); \
10290 uint16x8_t a_ = (a); \
10291 uint16x8_t result; \
10292 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
10293 : "=w"(result) \
10294 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10295 : /* No clobbers */); \
10296 result; \
10299 #define vmlaq_laneq_u32(a, b, c, d) \
10300 __extension__ \
10301 ({ \
10302 uint32x4_t c_ = (c); \
10303 uint32x4_t b_ = (b); \
10304 uint32x4_t a_ = (a); \
10305 uint32x4_t result; \
10306 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
10307 : "=w"(result) \
10308 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10309 : /* No clobbers */); \
10310 result; \
10313 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10314 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
10316 float32x4_t result;
10317 float32x4_t t1;
10318 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
10319 : "=w"(result), "=w"(t1)
10320 : "0"(a), "w"(b), "w"(c)
10321 : /* No clobbers */);
10322 return result;
10325 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10326 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
10328 float64x2_t result;
10329 float64x2_t t1;
10330 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
10331 : "=w"(result), "=w"(t1)
10332 : "0"(a), "w"(b), "w"(c)
10333 : /* No clobbers */);
10334 return result;
10337 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10338 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
10340 int16x8_t result;
10341 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
10342 : "=w"(result)
10343 : "0"(a), "w"(b), "w"(c)
10344 : /* No clobbers */);
10345 return result;
10348 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10349 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
10351 int32x4_t result;
10352 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
10353 : "=w"(result)
10354 : "0"(a), "w"(b), "w"(c)
10355 : /* No clobbers */);
10356 return result;
10359 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10360 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
10362 uint16x8_t result;
10363 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
10364 : "=w"(result)
10365 : "0"(a), "w"(b), "w"(c)
10366 : /* No clobbers */);
10367 return result;
10370 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10371 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
10373 uint32x4_t result;
10374 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
10375 : "=w"(result)
10376 : "0"(a), "w"(b), "w"(c)
10377 : /* No clobbers */);
10378 return result;
10381 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10382 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
10384 int8x16_t result;
10385 __asm__ ("mla %0.16b, %2.16b, %3.16b"
10386 : "=w"(result)
10387 : "0"(a), "w"(b), "w"(c)
10388 : /* No clobbers */);
10389 return result;
10392 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10393 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
10395 int16x8_t result;
10396 __asm__ ("mla %0.8h, %2.8h, %3.8h"
10397 : "=w"(result)
10398 : "0"(a), "w"(b), "w"(c)
10399 : /* No clobbers */);
10400 return result;
10403 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10404 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
10406 int32x4_t result;
10407 __asm__ ("mla %0.4s, %2.4s, %3.4s"
10408 : "=w"(result)
10409 : "0"(a), "w"(b), "w"(c)
10410 : /* No clobbers */);
10411 return result;
10414 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10415 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
10417 uint8x16_t result;
10418 __asm__ ("mla %0.16b, %2.16b, %3.16b"
10419 : "=w"(result)
10420 : "0"(a), "w"(b), "w"(c)
10421 : /* No clobbers */);
10422 return result;
10425 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10426 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
10428 uint16x8_t result;
10429 __asm__ ("mla %0.8h, %2.8h, %3.8h"
10430 : "=w"(result)
10431 : "0"(a), "w"(b), "w"(c)
10432 : /* No clobbers */);
10433 return result;
10436 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10437 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
10439 uint32x4_t result;
10440 __asm__ ("mla %0.4s, %2.4s, %3.4s"
10441 : "=w"(result)
10442 : "0"(a), "w"(b), "w"(c)
10443 : /* No clobbers */);
10444 return result;
10447 #define vmls_lane_f32(a, b, c, d) \
10448 __extension__ \
10449 ({ \
10450 float32x2_t c_ = (c); \
10451 float32x2_t b_ = (b); \
10452 float32x2_t a_ = (a); \
10453 float32x2_t result; \
10454 float32x2_t t1; \
10455 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fsub %0.2s, %0.2s, %1.2s" \
10456 : "=w"(result), "=w"(t1) \
10457 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10458 : /* No clobbers */); \
10459 result; \
10462 #define vmls_lane_s16(a, b, c, d) \
10463 __extension__ \
10464 ({ \
10465 int16x4_t c_ = (c); \
10466 int16x4_t b_ = (b); \
10467 int16x4_t a_ = (a); \
10468 int16x4_t result; \
10469 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
10470 : "=w"(result) \
10471 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10472 : /* No clobbers */); \
10473 result; \
10476 #define vmls_lane_s32(a, b, c, d) \
10477 __extension__ \
10478 ({ \
10479 int32x2_t c_ = (c); \
10480 int32x2_t b_ = (b); \
10481 int32x2_t a_ = (a); \
10482 int32x2_t result; \
10483 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
10484 : "=w"(result) \
10485 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10486 : /* No clobbers */); \
10487 result; \
10490 #define vmls_lane_u16(a, b, c, d) \
10491 __extension__ \
10492 ({ \
10493 uint16x4_t c_ = (c); \
10494 uint16x4_t b_ = (b); \
10495 uint16x4_t a_ = (a); \
10496 uint16x4_t result; \
10497 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
10498 : "=w"(result) \
10499 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10500 : /* No clobbers */); \
10501 result; \
10504 #define vmls_lane_u32(a, b, c, d) \
10505 __extension__ \
10506 ({ \
10507 uint32x2_t c_ = (c); \
10508 uint32x2_t b_ = (b); \
10509 uint32x2_t a_ = (a); \
10510 uint32x2_t result; \
10511 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
10512 : "=w"(result) \
10513 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10514 : /* No clobbers */); \
10515 result; \
10518 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10519 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
10521 float32x2_t result;
10522 float32x2_t t1;
10523 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
10524 : "=w"(result), "=w"(t1)
10525 : "0"(a), "w"(b), "w"(c)
10526 : /* No clobbers */);
10527 return result;
10530 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10531 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
10533 int16x4_t result;
10534 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
10535 : "=w"(result)
10536 : "0"(a), "w"(b), "w"(c)
10537 : /* No clobbers */);
10538 return result;
10541 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10542 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
10544 int32x2_t result;
10545 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
10546 : "=w"(result)
10547 : "0"(a), "w"(b), "w"(c)
10548 : /* No clobbers */);
10549 return result;
10552 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10553 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
10555 uint16x4_t result;
10556 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
10557 : "=w"(result)
10558 : "0"(a), "w"(b), "w"(c)
10559 : /* No clobbers */);
10560 return result;
10563 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10564 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
10566 uint32x2_t result;
10567 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
10568 : "=w"(result)
10569 : "0"(a), "w"(b), "w"(c)
10570 : /* No clobbers */);
10571 return result;
10574 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10575 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
10577 int8x8_t result;
10578 __asm__ ("mls %0.8b,%2.8b,%3.8b"
10579 : "=w"(result)
10580 : "0"(a), "w"(b), "w"(c)
10581 : /* No clobbers */);
10582 return result;
10585 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10586 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
10588 int16x4_t result;
10589 __asm__ ("mls %0.4h,%2.4h,%3.4h"
10590 : "=w"(result)
10591 : "0"(a), "w"(b), "w"(c)
10592 : /* No clobbers */);
10593 return result;
10596 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10597 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
10599 int32x2_t result;
10600 __asm__ ("mls %0.2s,%2.2s,%3.2s"
10601 : "=w"(result)
10602 : "0"(a), "w"(b), "w"(c)
10603 : /* No clobbers */);
10604 return result;
10607 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10608 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
10610 uint8x8_t result;
10611 __asm__ ("mls %0.8b,%2.8b,%3.8b"
10612 : "=w"(result)
10613 : "0"(a), "w"(b), "w"(c)
10614 : /* No clobbers */);
10615 return result;
10618 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10619 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
10621 uint16x4_t result;
10622 __asm__ ("mls %0.4h,%2.4h,%3.4h"
10623 : "=w"(result)
10624 : "0"(a), "w"(b), "w"(c)
10625 : /* No clobbers */);
10626 return result;
10629 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10630 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
10632 uint32x2_t result;
10633 __asm__ ("mls %0.2s,%2.2s,%3.2s"
10634 : "=w"(result)
10635 : "0"(a), "w"(b), "w"(c)
10636 : /* No clobbers */);
10637 return result;
10640 #define vmlsl_high_lane_s16(a, b, c, d) \
10641 __extension__ \
10642 ({ \
10643 int16x8_t c_ = (c); \
10644 int16x8_t b_ = (b); \
10645 int32x4_t a_ = (a); \
10646 int32x4_t result; \
10647 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
10648 : "=w"(result) \
10649 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10650 : /* No clobbers */); \
10651 result; \
10654 #define vmlsl_high_lane_s32(a, b, c, d) \
10655 __extension__ \
10656 ({ \
10657 int32x4_t c_ = (c); \
10658 int32x4_t b_ = (b); \
10659 int64x2_t a_ = (a); \
10660 int64x2_t result; \
10661 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
10662 : "=w"(result) \
10663 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10664 : /* No clobbers */); \
10665 result; \
10668 #define vmlsl_high_lane_u16(a, b, c, d) \
10669 __extension__ \
10670 ({ \
10671 uint16x8_t c_ = (c); \
10672 uint16x8_t b_ = (b); \
10673 uint32x4_t a_ = (a); \
10674 uint32x4_t result; \
10675 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
10676 : "=w"(result) \
10677 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10678 : /* No clobbers */); \
10679 result; \
10682 #define vmlsl_high_lane_u32(a, b, c, d) \
10683 __extension__ \
10684 ({ \
10685 uint32x4_t c_ = (c); \
10686 uint32x4_t b_ = (b); \
10687 uint64x2_t a_ = (a); \
10688 uint64x2_t result; \
10689 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
10690 : "=w"(result) \
10691 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10692 : /* No clobbers */); \
10693 result; \
10696 #define vmlsl_high_laneq_s16(a, b, c, d) \
10697 __extension__ \
10698 ({ \
10699 int16x8_t c_ = (c); \
10700 int16x8_t b_ = (b); \
10701 int32x4_t a_ = (a); \
10702 int32x4_t result; \
10703 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
10704 : "=w"(result) \
10705 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10706 : /* No clobbers */); \
10707 result; \
10710 #define vmlsl_high_laneq_s32(a, b, c, d) \
10711 __extension__ \
10712 ({ \
10713 int32x4_t c_ = (c); \
10714 int32x4_t b_ = (b); \
10715 int64x2_t a_ = (a); \
10716 int64x2_t result; \
10717 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
10718 : "=w"(result) \
10719 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10720 : /* No clobbers */); \
10721 result; \
10724 #define vmlsl_high_laneq_u16(a, b, c, d) \
10725 __extension__ \
10726 ({ \
10727 uint16x8_t c_ = (c); \
10728 uint16x8_t b_ = (b); \
10729 uint32x4_t a_ = (a); \
10730 uint32x4_t result; \
10731 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
10732 : "=w"(result) \
10733 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10734 : /* No clobbers */); \
10735 result; \
10738 #define vmlsl_high_laneq_u32(a, b, c, d) \
10739 __extension__ \
10740 ({ \
10741 uint32x4_t c_ = (c); \
10742 uint32x4_t b_ = (b); \
10743 uint64x2_t a_ = (a); \
10744 uint64x2_t result; \
10745 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
10746 : "=w"(result) \
10747 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10748 : /* No clobbers */); \
10749 result; \
10752 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10753 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
10755 int32x4_t result;
10756 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
10757 : "=w"(result)
10758 : "0"(a), "w"(b), "w"(c)
10759 : /* No clobbers */);
10760 return result;
10763 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10764 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
10766 int64x2_t result;
10767 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
10768 : "=w"(result)
10769 : "0"(a), "w"(b), "w"(c)
10770 : /* No clobbers */);
10771 return result;
10774 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10775 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
10777 uint32x4_t result;
10778 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
10779 : "=w"(result)
10780 : "0"(a), "w"(b), "w"(c)
10781 : /* No clobbers */);
10782 return result;
10785 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10786 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
10788 uint64x2_t result;
10789 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
10790 : "=w"(result)
10791 : "0"(a), "w"(b), "w"(c)
10792 : /* No clobbers */);
10793 return result;
10796 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10797 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
10799 int16x8_t result;
10800 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
10801 : "=w"(result)
10802 : "0"(a), "w"(b), "w"(c)
10803 : /* No clobbers */);
10804 return result;
10807 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10808 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
10810 int32x4_t result;
10811 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
10812 : "=w"(result)
10813 : "0"(a), "w"(b), "w"(c)
10814 : /* No clobbers */);
10815 return result;
10818 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10819 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
10821 int64x2_t result;
10822 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
10823 : "=w"(result)
10824 : "0"(a), "w"(b), "w"(c)
10825 : /* No clobbers */);
10826 return result;
10829 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10830 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
10832 uint16x8_t result;
10833 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
10834 : "=w"(result)
10835 : "0"(a), "w"(b), "w"(c)
10836 : /* No clobbers */);
10837 return result;
10840 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10841 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
10843 uint32x4_t result;
10844 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
10845 : "=w"(result)
10846 : "0"(a), "w"(b), "w"(c)
10847 : /* No clobbers */);
10848 return result;
10851 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10852 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
10854 uint64x2_t result;
10855 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
10856 : "=w"(result)
10857 : "0"(a), "w"(b), "w"(c)
10858 : /* No clobbers */);
10859 return result;
10862 #define vmlsl_lane_s16(a, b, c, d) \
10863 __extension__ \
10864 ({ \
10865 int16x4_t c_ = (c); \
10866 int16x4_t b_ = (b); \
10867 int32x4_t a_ = (a); \
10868 int32x4_t result; \
10869 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
10870 : "=w"(result) \
10871 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10872 : /* No clobbers */); \
10873 result; \
10876 #define vmlsl_lane_s32(a, b, c, d) \
10877 __extension__ \
10878 ({ \
10879 int32x2_t c_ = (c); \
10880 int32x2_t b_ = (b); \
10881 int64x2_t a_ = (a); \
10882 int64x2_t result; \
10883 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
10884 : "=w"(result) \
10885 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10886 : /* No clobbers */); \
10887 result; \
10890 #define vmlsl_lane_u16(a, b, c, d) \
10891 __extension__ \
10892 ({ \
10893 uint16x4_t c_ = (c); \
10894 uint16x4_t b_ = (b); \
10895 uint32x4_t a_ = (a); \
10896 uint32x4_t result; \
10897 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
10898 : "=w"(result) \
10899 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10900 : /* No clobbers */); \
10901 result; \
10904 #define vmlsl_lane_u32(a, b, c, d) \
10905 __extension__ \
10906 ({ \
10907 uint32x2_t c_ = (c); \
10908 uint32x2_t b_ = (b); \
10909 uint64x2_t a_ = (a); \
10910 uint64x2_t result; \
10911 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
10912 : "=w"(result) \
10913 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10914 : /* No clobbers */); \
10915 result; \
10918 #define vmlsl_laneq_s16(a, b, c, d) \
10919 __extension__ \
10920 ({ \
10921 int16x8_t c_ = (c); \
10922 int16x4_t b_ = (b); \
10923 int32x4_t a_ = (a); \
10924 int32x4_t result; \
10925 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
10926 : "=w"(result) \
10927 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10928 : /* No clobbers */); \
10929 result; \
10932 #define vmlsl_laneq_s32(a, b, c, d) \
10933 __extension__ \
10934 ({ \
10935 int32x4_t c_ = (c); \
10936 int32x2_t b_ = (b); \
10937 int64x2_t a_ = (a); \
10938 int64x2_t result; \
10939 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
10940 : "=w"(result) \
10941 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10942 : /* No clobbers */); \
10943 result; \
10946 #define vmlsl_laneq_u16(a, b, c, d) \
10947 __extension__ \
10948 ({ \
10949 uint16x8_t c_ = (c); \
10950 uint16x4_t b_ = (b); \
10951 uint32x4_t a_ = (a); \
10952 uint32x4_t result; \
10953 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
10954 : "=w"(result) \
10955 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10956 : /* No clobbers */); \
10957 result; \
10960 #define vmlsl_laneq_u32(a, b, c, d) \
10961 __extension__ \
10962 ({ \
10963 uint32x4_t c_ = (c); \
10964 uint32x2_t b_ = (b); \
10965 uint64x2_t a_ = (a); \
10966 uint64x2_t result; \
10967 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
10968 : "=w"(result) \
10969 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
10970 : /* No clobbers */); \
10971 result; \
10974 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10975 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
10977 int32x4_t result;
10978 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
10979 : "=w"(result)
10980 : "0"(a), "w"(b), "w"(c)
10981 : /* No clobbers */);
10982 return result;
10985 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10986 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
10988 int64x2_t result;
10989 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
10990 : "=w"(result)
10991 : "0"(a), "w"(b), "w"(c)
10992 : /* No clobbers */);
10993 return result;
10996 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10997 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
10999 uint32x4_t result;
11000 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
11001 : "=w"(result)
11002 : "0"(a), "w"(b), "w"(c)
11003 : /* No clobbers */);
11004 return result;
11007 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11008 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
11010 uint64x2_t result;
11011 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
11012 : "=w"(result)
11013 : "0"(a), "w"(b), "w"(c)
11014 : /* No clobbers */);
11015 return result;
11018 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11019 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
11021 int16x8_t result;
11022 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
11023 : "=w"(result)
11024 : "0"(a), "w"(b), "w"(c)
11025 : /* No clobbers */);
11026 return result;
11029 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11030 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
11032 int32x4_t result;
11033 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
11034 : "=w"(result)
11035 : "0"(a), "w"(b), "w"(c)
11036 : /* No clobbers */);
11037 return result;
11040 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11041 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
11043 int64x2_t result;
11044 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
11045 : "=w"(result)
11046 : "0"(a), "w"(b), "w"(c)
11047 : /* No clobbers */);
11048 return result;
11051 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11052 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
11054 uint16x8_t result;
11055 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
11056 : "=w"(result)
11057 : "0"(a), "w"(b), "w"(c)
11058 : /* No clobbers */);
11059 return result;
11062 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11063 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
11065 uint32x4_t result;
11066 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
11067 : "=w"(result)
11068 : "0"(a), "w"(b), "w"(c)
11069 : /* No clobbers */);
11070 return result;
11073 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11074 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
11076 uint64x2_t result;
11077 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
11078 : "=w"(result)
11079 : "0"(a), "w"(b), "w"(c)
11080 : /* No clobbers */);
11081 return result;
11084 #define vmlsq_lane_f32(a, b, c, d) \
11085 __extension__ \
11086 ({ \
11087 float32x4_t c_ = (c); \
11088 float32x4_t b_ = (b); \
11089 float32x4_t a_ = (a); \
11090 float32x4_t result; \
11091 float32x4_t t1; \
11092 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
11093 : "=w"(result), "=w"(t1) \
11094 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11095 : /* No clobbers */); \
11096 result; \
11099 #define vmlsq_lane_s16(a, b, c, d) \
11100 __extension__ \
11101 ({ \
11102 int16x8_t c_ = (c); \
11103 int16x8_t b_ = (b); \
11104 int16x8_t a_ = (a); \
11105 int16x8_t result; \
11106 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
11107 : "=w"(result) \
11108 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11109 : /* No clobbers */); \
11110 result; \
11113 #define vmlsq_lane_s32(a, b, c, d) \
11114 __extension__ \
11115 ({ \
11116 int32x4_t c_ = (c); \
11117 int32x4_t b_ = (b); \
11118 int32x4_t a_ = (a); \
11119 int32x4_t result; \
11120 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
11121 : "=w"(result) \
11122 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11123 : /* No clobbers */); \
11124 result; \
11127 #define vmlsq_lane_u16(a, b, c, d) \
11128 __extension__ \
11129 ({ \
11130 uint16x8_t c_ = (c); \
11131 uint16x8_t b_ = (b); \
11132 uint16x8_t a_ = (a); \
11133 uint16x8_t result; \
11134 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
11135 : "=w"(result) \
11136 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11137 : /* No clobbers */); \
11138 result; \
11141 #define vmlsq_lane_u32(a, b, c, d) \
11142 __extension__ \
11143 ({ \
11144 uint32x4_t c_ = (c); \
11145 uint32x4_t b_ = (b); \
11146 uint32x4_t a_ = (a); \
11147 uint32x4_t result; \
11148 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
11149 : "=w"(result) \
11150 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
11151 : /* No clobbers */); \
11152 result; \
11155 #define vmlsq_laneq_f32(__a, __b, __c, __d) \
11156 __extension__ \
11157 ({ \
11158 float32x4_t __c_ = (__c); \
11159 float32x4_t __b_ = (__b); \
11160 float32x4_t __a_ = (__a); \
11161 float32x4_t __result; \
11162 float32x4_t __t1; \
11163 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
11164 : "=w"(__result), "=w"(__t1) \
11165 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11166 : /* No clobbers */); \
11167 __result; \
11170 #define vmlsq_laneq_s16(__a, __b, __c, __d) \
11171 __extension__ \
11172 ({ \
11173 int16x8_t __c_ = (__c); \
11174 int16x8_t __b_ = (__b); \
11175 int16x8_t __a_ = (__a); \
11176 int16x8_t __result; \
11177 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
11178 : "=w"(__result) \
11179 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11180 : /* No clobbers */); \
11181 __result; \
11184 #define vmlsq_laneq_s32(__a, __b, __c, __d) \
11185 __extension__ \
11186 ({ \
11187 int32x4_t __c_ = (__c); \
11188 int32x4_t __b_ = (__b); \
11189 int32x4_t __a_ = (__a); \
11190 int32x4_t __result; \
11191 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
11192 : "=w"(__result) \
11193 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11194 : /* No clobbers */); \
11195 __result; \
11198 #define vmlsq_laneq_u16(__a, __b, __c, __d) \
11199 __extension__ \
11200 ({ \
11201 uint16x8_t __c_ = (__c); \
11202 uint16x8_t __b_ = (__b); \
11203 uint16x8_t __a_ = (__a); \
11204 uint16x8_t __result; \
11205 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
11206 : "=w"(__result) \
11207 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11208 : /* No clobbers */); \
11209 __result; \
11212 #define vmlsq_laneq_u32(__a, __b, __c, __d) \
11213 __extension__ \
11214 ({ \
11215 uint32x4_t __c_ = (__c); \
11216 uint32x4_t __b_ = (__b); \
11217 uint32x4_t __a_ = (__a); \
11218 uint32x4_t __result; \
11219 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
11220 : "=w"(__result) \
11221 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
11222 : /* No clobbers */); \
11223 __result; \
11226 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11227 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
11229 float32x4_t result;
11230 float32x4_t t1;
11231 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
11232 : "=w"(result), "=w"(t1)
11233 : "0"(a), "w"(b), "w"(c)
11234 : /* No clobbers */);
11235 return result;
11238 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11239 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
11241 float64x2_t result;
11242 float64x2_t t1;
11243 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
11244 : "=w"(result), "=w"(t1)
11245 : "0"(a), "w"(b), "w"(c)
11246 : /* No clobbers */);
11247 return result;
11250 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11251 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
11253 int16x8_t result;
11254 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
11255 : "=w"(result)
11256 : "0"(a), "w"(b), "w"(c)
11257 : /* No clobbers */);
11258 return result;
11261 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11262 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
11264 int32x4_t result;
11265 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
11266 : "=w"(result)
11267 : "0"(a), "w"(b), "w"(c)
11268 : /* No clobbers */);
11269 return result;
11272 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11273 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
11275 uint16x8_t result;
11276 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
11277 : "=w"(result)
11278 : "0"(a), "w"(b), "w"(c)
11279 : /* No clobbers */);
11280 return result;
11283 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11284 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
11286 uint32x4_t result;
11287 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
11288 : "=w"(result)
11289 : "0"(a), "w"(b), "w"(c)
11290 : /* No clobbers */);
11291 return result;
11294 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11295 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
11297 int8x16_t result;
11298 __asm__ ("mls %0.16b,%2.16b,%3.16b"
11299 : "=w"(result)
11300 : "0"(a), "w"(b), "w"(c)
11301 : /* No clobbers */);
11302 return result;
11305 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11306 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
11308 int16x8_t result;
11309 __asm__ ("mls %0.8h,%2.8h,%3.8h"
11310 : "=w"(result)
11311 : "0"(a), "w"(b), "w"(c)
11312 : /* No clobbers */);
11313 return result;
11316 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11317 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
11319 int32x4_t result;
11320 __asm__ ("mls %0.4s,%2.4s,%3.4s"
11321 : "=w"(result)
11322 : "0"(a), "w"(b), "w"(c)
11323 : /* No clobbers */);
11324 return result;
11327 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11328 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
11330 uint8x16_t result;
11331 __asm__ ("mls %0.16b,%2.16b,%3.16b"
11332 : "=w"(result)
11333 : "0"(a), "w"(b), "w"(c)
11334 : /* No clobbers */);
11335 return result;
11338 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11339 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
11341 uint16x8_t result;
11342 __asm__ ("mls %0.8h,%2.8h,%3.8h"
11343 : "=w"(result)
11344 : "0"(a), "w"(b), "w"(c)
11345 : /* No clobbers */);
11346 return result;
11349 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11350 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
11352 uint32x4_t result;
11353 __asm__ ("mls %0.4s,%2.4s,%3.4s"
11354 : "=w"(result)
11355 : "0"(a), "w"(b), "w"(c)
11356 : /* No clobbers */);
11357 return result;
11360 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11361 vmov_n_f32 (float32_t a)
11363 float32x2_t result;
11364 __asm__ ("dup %0.2s, %w1"
11365 : "=w"(result)
11366 : "r"(a)
11367 : /* No clobbers */);
11368 return result;
11371 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
11372 vmov_n_p8 (uint32_t a)
11374 poly8x8_t result;
11375 __asm__ ("dup %0.8b,%w1"
11376 : "=w"(result)
11377 : "r"(a)
11378 : /* No clobbers */);
11379 return result;
11382 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
11383 vmov_n_p16 (uint32_t a)
11385 poly16x4_t result;
11386 __asm__ ("dup %0.4h,%w1"
11387 : "=w"(result)
11388 : "r"(a)
11389 : /* No clobbers */);
11390 return result;
11393 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11394 vmov_n_s8 (int32_t a)
11396 int8x8_t result;
11397 __asm__ ("dup %0.8b,%w1"
11398 : "=w"(result)
11399 : "r"(a)
11400 : /* No clobbers */);
11401 return result;
11404 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11405 vmov_n_s16 (int32_t a)
11407 int16x4_t result;
11408 __asm__ ("dup %0.4h,%w1"
11409 : "=w"(result)
11410 : "r"(a)
11411 : /* No clobbers */);
11412 return result;
11415 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11416 vmov_n_s32 (int32_t a)
11418 int32x2_t result;
11419 __asm__ ("dup %0.2s,%w1"
11420 : "=w"(result)
11421 : "r"(a)
11422 : /* No clobbers */);
11423 return result;
11426 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
11427 vmov_n_s64 (int64_t a)
11429 int64x1_t result;
11430 __asm__ ("ins %0.d[0],%x1"
11431 : "=w"(result)
11432 : "r"(a)
11433 : /* No clobbers */);
11434 return result;
11437 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11438 vmov_n_u8 (uint32_t a)
11440 uint8x8_t result;
11441 __asm__ ("dup %0.8b,%w1"
11442 : "=w"(result)
11443 : "r"(a)
11444 : /* No clobbers */);
11445 return result;
11448 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11449 vmov_n_u16 (uint32_t a)
11451 uint16x4_t result;
11452 __asm__ ("dup %0.4h,%w1"
11453 : "=w"(result)
11454 : "r"(a)
11455 : /* No clobbers */);
11456 return result;
11459 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11460 vmov_n_u32 (uint32_t a)
11462 uint32x2_t result;
11463 __asm__ ("dup %0.2s,%w1"
11464 : "=w"(result)
11465 : "r"(a)
11466 : /* No clobbers */);
11467 return result;
11470 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11471 vmov_n_u64 (uint64_t a)
11473 uint64x1_t result;
11474 __asm__ ("ins %0.d[0],%x1"
11475 : "=w"(result)
11476 : "r"(a)
11477 : /* No clobbers */);
11478 return result;
11481 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11482 vmovl_high_s8 (int8x16_t a)
11484 int16x8_t result;
11485 __asm__ ("sshll2 %0.8h,%1.16b,#0"
11486 : "=w"(result)
11487 : "w"(a)
11488 : /* No clobbers */);
11489 return result;
11492 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11493 vmovl_high_s16 (int16x8_t a)
11495 int32x4_t result;
11496 __asm__ ("sshll2 %0.4s,%1.8h,#0"
11497 : "=w"(result)
11498 : "w"(a)
11499 : /* No clobbers */);
11500 return result;
11503 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11504 vmovl_high_s32 (int32x4_t a)
11506 int64x2_t result;
11507 __asm__ ("sshll2 %0.2d,%1.4s,#0"
11508 : "=w"(result)
11509 : "w"(a)
11510 : /* No clobbers */);
11511 return result;
11514 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11515 vmovl_high_u8 (uint8x16_t a)
11517 uint16x8_t result;
11518 __asm__ ("ushll2 %0.8h,%1.16b,#0"
11519 : "=w"(result)
11520 : "w"(a)
11521 : /* No clobbers */);
11522 return result;
11525 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11526 vmovl_high_u16 (uint16x8_t a)
11528 uint32x4_t result;
11529 __asm__ ("ushll2 %0.4s,%1.8h,#0"
11530 : "=w"(result)
11531 : "w"(a)
11532 : /* No clobbers */);
11533 return result;
11536 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11537 vmovl_high_u32 (uint32x4_t a)
11539 uint64x2_t result;
11540 __asm__ ("ushll2 %0.2d,%1.4s,#0"
11541 : "=w"(result)
11542 : "w"(a)
11543 : /* No clobbers */);
11544 return result;
11547 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11548 vmovl_s8 (int8x8_t a)
11550 int16x8_t result;
11551 __asm__ ("sshll %0.8h,%1.8b,#0"
11552 : "=w"(result)
11553 : "w"(a)
11554 : /* No clobbers */);
11555 return result;
11558 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11559 vmovl_s16 (int16x4_t a)
11561 int32x4_t result;
11562 __asm__ ("sshll %0.4s,%1.4h,#0"
11563 : "=w"(result)
11564 : "w"(a)
11565 : /* No clobbers */);
11566 return result;
11569 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11570 vmovl_s32 (int32x2_t a)
11572 int64x2_t result;
11573 __asm__ ("sshll %0.2d,%1.2s,#0"
11574 : "=w"(result)
11575 : "w"(a)
11576 : /* No clobbers */);
11577 return result;
11580 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11581 vmovl_u8 (uint8x8_t a)
11583 uint16x8_t result;
11584 __asm__ ("ushll %0.8h,%1.8b,#0"
11585 : "=w"(result)
11586 : "w"(a)
11587 : /* No clobbers */);
11588 return result;
11591 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11592 vmovl_u16 (uint16x4_t a)
11594 uint32x4_t result;
11595 __asm__ ("ushll %0.4s,%1.4h,#0"
11596 : "=w"(result)
11597 : "w"(a)
11598 : /* No clobbers */);
11599 return result;
11602 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11603 vmovl_u32 (uint32x2_t a)
11605 uint64x2_t result;
11606 __asm__ ("ushll %0.2d,%1.2s,#0"
11607 : "=w"(result)
11608 : "w"(a)
11609 : /* No clobbers */);
11610 return result;
11613 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11614 vmovn_high_s16 (int8x8_t a, int16x8_t b)
11616 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
11617 __asm__ ("xtn2 %0.16b,%1.8h"
11618 : "+w"(result)
11619 : "w"(b)
11620 : /* No clobbers */);
11621 return result;
11624 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11625 vmovn_high_s32 (int16x4_t a, int32x4_t b)
11627 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
11628 __asm__ ("xtn2 %0.8h,%1.4s"
11629 : "+w"(result)
11630 : "w"(b)
11631 : /* No clobbers */);
11632 return result;
11635 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11636 vmovn_high_s64 (int32x2_t a, int64x2_t b)
11638 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
11639 __asm__ ("xtn2 %0.4s,%1.2d"
11640 : "+w"(result)
11641 : "w"(b)
11642 : /* No clobbers */);
11643 return result;
11646 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11647 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
11649 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
11650 __asm__ ("xtn2 %0.16b,%1.8h"
11651 : "+w"(result)
11652 : "w"(b)
11653 : /* No clobbers */);
11654 return result;
11657 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11658 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
11660 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
11661 __asm__ ("xtn2 %0.8h,%1.4s"
11662 : "+w"(result)
11663 : "w"(b)
11664 : /* No clobbers */);
11665 return result;
11668 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11669 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
11671 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
11672 __asm__ ("xtn2 %0.4s,%1.2d"
11673 : "+w"(result)
11674 : "w"(b)
11675 : /* No clobbers */);
11676 return result;
11679 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11680 vmovn_s16 (int16x8_t a)
11682 int8x8_t result;
11683 __asm__ ("xtn %0.8b,%1.8h"
11684 : "=w"(result)
11685 : "w"(a)
11686 : /* No clobbers */);
11687 return result;
11690 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11691 vmovn_s32 (int32x4_t a)
11693 int16x4_t result;
11694 __asm__ ("xtn %0.4h,%1.4s"
11695 : "=w"(result)
11696 : "w"(a)
11697 : /* No clobbers */);
11698 return result;
11701 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11702 vmovn_s64 (int64x2_t a)
11704 int32x2_t result;
11705 __asm__ ("xtn %0.2s,%1.2d"
11706 : "=w"(result)
11707 : "w"(a)
11708 : /* No clobbers */);
11709 return result;
11712 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11713 vmovn_u16 (uint16x8_t a)
11715 uint8x8_t result;
11716 __asm__ ("xtn %0.8b,%1.8h"
11717 : "=w"(result)
11718 : "w"(a)
11719 : /* No clobbers */);
11720 return result;
11723 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11724 vmovn_u32 (uint32x4_t a)
11726 uint16x4_t result;
11727 __asm__ ("xtn %0.4h,%1.4s"
11728 : "=w"(result)
11729 : "w"(a)
11730 : /* No clobbers */);
11731 return result;
11734 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11735 vmovn_u64 (uint64x2_t a)
11737 uint32x2_t result;
11738 __asm__ ("xtn %0.2s,%1.2d"
11739 : "=w"(result)
11740 : "w"(a)
11741 : /* No clobbers */);
11742 return result;
11745 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11746 vmovq_n_f32 (float32_t a)
11748 float32x4_t result;
11749 __asm__ ("dup %0.4s, %w1"
11750 : "=w"(result)
11751 : "r"(a)
11752 : /* No clobbers */);
11753 return result;
11756 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11757 vmovq_n_f64 (float64_t a)
11759 return (float64x2_t) {a, a};
11762 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11763 vmovq_n_p8 (uint32_t a)
11765 poly8x16_t result;
11766 __asm__ ("dup %0.16b,%w1"
11767 : "=w"(result)
11768 : "r"(a)
11769 : /* No clobbers */);
11770 return result;
11773 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
11774 vmovq_n_p16 (uint32_t a)
11776 poly16x8_t result;
11777 __asm__ ("dup %0.8h,%w1"
11778 : "=w"(result)
11779 : "r"(a)
11780 : /* No clobbers */);
11781 return result;
11784 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11785 vmovq_n_s8 (int32_t a)
11787 int8x16_t result;
11788 __asm__ ("dup %0.16b,%w1"
11789 : "=w"(result)
11790 : "r"(a)
11791 : /* No clobbers */);
11792 return result;
11795 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11796 vmovq_n_s16 (int32_t a)
11798 int16x8_t result;
11799 __asm__ ("dup %0.8h,%w1"
11800 : "=w"(result)
11801 : "r"(a)
11802 : /* No clobbers */);
11803 return result;
11806 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11807 vmovq_n_s32 (int32_t a)
11809 int32x4_t result;
11810 __asm__ ("dup %0.4s,%w1"
11811 : "=w"(result)
11812 : "r"(a)
11813 : /* No clobbers */);
11814 return result;
11817 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11818 vmovq_n_s64 (int64_t a)
11820 int64x2_t result;
11821 __asm__ ("dup %0.2d,%x1"
11822 : "=w"(result)
11823 : "r"(a)
11824 : /* No clobbers */);
11825 return result;
11828 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11829 vmovq_n_u8 (uint32_t a)
11831 uint8x16_t result;
11832 __asm__ ("dup %0.16b,%w1"
11833 : "=w"(result)
11834 : "r"(a)
11835 : /* No clobbers */);
11836 return result;
11839 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11840 vmovq_n_u16 (uint32_t a)
11842 uint16x8_t result;
11843 __asm__ ("dup %0.8h,%w1"
11844 : "=w"(result)
11845 : "r"(a)
11846 : /* No clobbers */);
11847 return result;
11850 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11851 vmovq_n_u32 (uint32_t a)
11853 uint32x4_t result;
11854 __asm__ ("dup %0.4s,%w1"
11855 : "=w"(result)
11856 : "r"(a)
11857 : /* No clobbers */);
11858 return result;
11861 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11862 vmovq_n_u64 (uint64_t a)
11864 uint64x2_t result;
11865 __asm__ ("dup %0.2d,%x1"
11866 : "=w"(result)
11867 : "r"(a)
11868 : /* No clobbers */);
11869 return result;
11872 #define vmul_lane_f32(a, b, c) \
11873 __extension__ \
11874 ({ \
11875 float32x2_t b_ = (b); \
11876 float32x2_t a_ = (a); \
11877 float32x2_t result; \
11878 __asm__ ("fmul %0.2s,%1.2s,%2.s[%3]" \
11879 : "=w"(result) \
11880 : "w"(a_), "w"(b_), "i"(c) \
11881 : /* No clobbers */); \
11882 result; \
11885 #define vmul_lane_s16(a, b, c) \
11886 __extension__ \
11887 ({ \
11888 int16x4_t b_ = (b); \
11889 int16x4_t a_ = (a); \
11890 int16x4_t result; \
11891 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
11892 : "=w"(result) \
11893 : "w"(a_), "w"(b_), "i"(c) \
11894 : /* No clobbers */); \
11895 result; \
11898 #define vmul_lane_s32(a, b, c) \
11899 __extension__ \
11900 ({ \
11901 int32x2_t b_ = (b); \
11902 int32x2_t a_ = (a); \
11903 int32x2_t result; \
11904 __asm__ ("mul %0.2s,%1.2s,%2.s[%3]" \
11905 : "=w"(result) \
11906 : "w"(a_), "w"(b_), "i"(c) \
11907 : /* No clobbers */); \
11908 result; \
11911 #define vmul_lane_u16(a, b, c) \
11912 __extension__ \
11913 ({ \
11914 uint16x4_t b_ = (b); \
11915 uint16x4_t a_ = (a); \
11916 uint16x4_t result; \
11917 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
11918 : "=w"(result) \
11919 : "w"(a_), "w"(b_), "i"(c) \
11920 : /* No clobbers */); \
11921 result; \
11924 #define vmul_lane_u32(a, b, c) \
11925 __extension__ \
11926 ({ \
11927 uint32x2_t b_ = (b); \
11928 uint32x2_t a_ = (a); \
11929 uint32x2_t result; \
11930 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
11931 : "=w"(result) \
11932 : "w"(a_), "w"(b_), "i"(c) \
11933 : /* No clobbers */); \
11934 result; \
11937 #define vmul_laneq_f32(a, b, c) \
11938 __extension__ \
11939 ({ \
11940 float32x4_t b_ = (b); \
11941 float32x2_t a_ = (a); \
11942 float32x2_t result; \
11943 __asm__ ("fmul %0.2s, %1.2s, %2.s[%3]" \
11944 : "=w"(result) \
11945 : "w"(a_), "w"(b_), "i"(c) \
11946 : /* No clobbers */); \
11947 result; \
11950 #define vmul_laneq_s16(a, b, c) \
11951 __extension__ \
11952 ({ \
11953 int16x8_t b_ = (b); \
11954 int16x4_t a_ = (a); \
11955 int16x4_t result; \
11956 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
11957 : "=w"(result) \
11958 : "w"(a_), "w"(b_), "i"(c) \
11959 : /* No clobbers */); \
11960 result; \
11963 #define vmul_laneq_s32(a, b, c) \
11964 __extension__ \
11965 ({ \
11966 int32x4_t b_ = (b); \
11967 int32x2_t a_ = (a); \
11968 int32x2_t result; \
11969 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
11970 : "=w"(result) \
11971 : "w"(a_), "w"(b_), "i"(c) \
11972 : /* No clobbers */); \
11973 result; \
11976 #define vmul_laneq_u16(a, b, c) \
11977 __extension__ \
11978 ({ \
11979 uint16x8_t b_ = (b); \
11980 uint16x4_t a_ = (a); \
11981 uint16x4_t result; \
11982 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
11983 : "=w"(result) \
11984 : "w"(a_), "w"(b_), "i"(c) \
11985 : /* No clobbers */); \
11986 result; \
11989 #define vmul_laneq_u32(a, b, c) \
11990 __extension__ \
11991 ({ \
11992 uint32x4_t b_ = (b); \
11993 uint32x2_t a_ = (a); \
11994 uint32x2_t result; \
11995 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
11996 : "=w"(result) \
11997 : "w"(a_), "w"(b_), "i"(c) \
11998 : /* No clobbers */); \
11999 result; \
12002 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12003 vmul_n_f32 (float32x2_t a, float32_t b)
12005 float32x2_t result;
12006 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
12007 : "=w"(result)
12008 : "w"(a), "w"(b)
12009 : /* No clobbers */);
12010 return result;
12013 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12014 vmul_n_s16 (int16x4_t a, int16_t b)
12016 int16x4_t result;
12017 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
12018 : "=w"(result)
12019 : "w"(a), "w"(b)
12020 : /* No clobbers */);
12021 return result;
12024 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12025 vmul_n_s32 (int32x2_t a, int32_t b)
12027 int32x2_t result;
12028 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
12029 : "=w"(result)
12030 : "w"(a), "w"(b)
12031 : /* No clobbers */);
12032 return result;
12035 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12036 vmul_n_u16 (uint16x4_t a, uint16_t b)
12038 uint16x4_t result;
12039 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
12040 : "=w"(result)
12041 : "w"(a), "w"(b)
12042 : /* No clobbers */);
12043 return result;
12046 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12047 vmul_n_u32 (uint32x2_t a, uint32_t b)
12049 uint32x2_t result;
12050 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
12051 : "=w"(result)
12052 : "w"(a), "w"(b)
12053 : /* No clobbers */);
12054 return result;
12057 #define vmuld_lane_f64(a, b, c) \
12058 __extension__ \
12059 ({ \
12060 float64x2_t b_ = (b); \
12061 float64_t a_ = (a); \
12062 float64_t result; \
12063 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
12064 : "=w"(result) \
12065 : "w"(a_), "w"(b_), "i"(c) \
12066 : /* No clobbers */); \
12067 result; \
12070 #define vmull_high_lane_s16(a, b, c) \
12071 __extension__ \
12072 ({ \
12073 int16x8_t b_ = (b); \
12074 int16x8_t a_ = (a); \
12075 int32x4_t result; \
12076 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
12077 : "=w"(result) \
12078 : "w"(a_), "w"(b_), "i"(c) \
12079 : /* No clobbers */); \
12080 result; \
12083 #define vmull_high_lane_s32(a, b, c) \
12084 __extension__ \
12085 ({ \
12086 int32x4_t b_ = (b); \
12087 int32x4_t a_ = (a); \
12088 int64x2_t result; \
12089 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
12090 : "=w"(result) \
12091 : "w"(a_), "w"(b_), "i"(c) \
12092 : /* No clobbers */); \
12093 result; \
12096 #define vmull_high_lane_u16(a, b, c) \
12097 __extension__ \
12098 ({ \
12099 uint16x8_t b_ = (b); \
12100 uint16x8_t a_ = (a); \
12101 uint32x4_t result; \
12102 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
12103 : "=w"(result) \
12104 : "w"(a_), "w"(b_), "i"(c) \
12105 : /* No clobbers */); \
12106 result; \
12109 #define vmull_high_lane_u32(a, b, c) \
12110 __extension__ \
12111 ({ \
12112 uint32x4_t b_ = (b); \
12113 uint32x4_t a_ = (a); \
12114 uint64x2_t result; \
12115 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
12116 : "=w"(result) \
12117 : "w"(a_), "w"(b_), "i"(c) \
12118 : /* No clobbers */); \
12119 result; \
12122 #define vmull_high_laneq_s16(a, b, c) \
12123 __extension__ \
12124 ({ \
12125 int16x8_t b_ = (b); \
12126 int16x8_t a_ = (a); \
12127 int32x4_t result; \
12128 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
12129 : "=w"(result) \
12130 : "w"(a_), "w"(b_), "i"(c) \
12131 : /* No clobbers */); \
12132 result; \
12135 #define vmull_high_laneq_s32(a, b, c) \
12136 __extension__ \
12137 ({ \
12138 int32x4_t b_ = (b); \
12139 int32x4_t a_ = (a); \
12140 int64x2_t result; \
12141 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
12142 : "=w"(result) \
12143 : "w"(a_), "w"(b_), "i"(c) \
12144 : /* No clobbers */); \
12145 result; \
12148 #define vmull_high_laneq_u16(a, b, c) \
12149 __extension__ \
12150 ({ \
12151 uint16x8_t b_ = (b); \
12152 uint16x8_t a_ = (a); \
12153 uint32x4_t result; \
12154 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
12155 : "=w"(result) \
12156 : "w"(a_), "w"(b_), "i"(c) \
12157 : /* No clobbers */); \
12158 result; \
12161 #define vmull_high_laneq_u32(a, b, c) \
12162 __extension__ \
12163 ({ \
12164 uint32x4_t b_ = (b); \
12165 uint32x4_t a_ = (a); \
12166 uint64x2_t result; \
12167 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
12168 : "=w"(result) \
12169 : "w"(a_), "w"(b_), "i"(c) \
12170 : /* No clobbers */); \
12171 result; \
12174 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12175 vmull_high_n_s16 (int16x8_t a, int16_t b)
12177 int32x4_t result;
12178 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
12179 : "=w"(result)
12180 : "w"(a), "w"(b)
12181 : /* No clobbers */);
12182 return result;
12185 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12186 vmull_high_n_s32 (int32x4_t a, int32_t b)
12188 int64x2_t result;
12189 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
12190 : "=w"(result)
12191 : "w"(a), "w"(b)
12192 : /* No clobbers */);
12193 return result;
12196 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12197 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
12199 uint32x4_t result;
12200 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
12201 : "=w"(result)
12202 : "w"(a), "w"(b)
12203 : /* No clobbers */);
12204 return result;
12207 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12208 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
12210 uint64x2_t result;
12211 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
12212 : "=w"(result)
12213 : "w"(a), "w"(b)
12214 : /* No clobbers */);
12215 return result;
12218 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12219 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
12221 poly16x8_t result;
12222 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
12223 : "=w"(result)
12224 : "w"(a), "w"(b)
12225 : /* No clobbers */);
12226 return result;
12229 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12230 vmull_high_s8 (int8x16_t a, int8x16_t b)
12232 int16x8_t result;
12233 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
12234 : "=w"(result)
12235 : "w"(a), "w"(b)
12236 : /* No clobbers */);
12237 return result;
12240 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12241 vmull_high_s16 (int16x8_t a, int16x8_t b)
12243 int32x4_t result;
12244 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
12245 : "=w"(result)
12246 : "w"(a), "w"(b)
12247 : /* No clobbers */);
12248 return result;
12251 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12252 vmull_high_s32 (int32x4_t a, int32x4_t b)
12254 int64x2_t result;
12255 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
12256 : "=w"(result)
12257 : "w"(a), "w"(b)
12258 : /* No clobbers */);
12259 return result;
12262 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12263 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
12265 uint16x8_t result;
12266 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
12267 : "=w"(result)
12268 : "w"(a), "w"(b)
12269 : /* No clobbers */);
12270 return result;
12273 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12274 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
12276 uint32x4_t result;
12277 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
12278 : "=w"(result)
12279 : "w"(a), "w"(b)
12280 : /* No clobbers */);
12281 return result;
12284 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12285 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
12287 uint64x2_t result;
12288 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
12289 : "=w"(result)
12290 : "w"(a), "w"(b)
12291 : /* No clobbers */);
12292 return result;
12295 #define vmull_lane_s16(a, b, c) \
12296 __extension__ \
12297 ({ \
12298 int16x4_t b_ = (b); \
12299 int16x4_t a_ = (a); \
12300 int32x4_t result; \
12301 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
12302 : "=w"(result) \
12303 : "w"(a_), "w"(b_), "i"(c) \
12304 : /* No clobbers */); \
12305 result; \
12308 #define vmull_lane_s32(a, b, c) \
12309 __extension__ \
12310 ({ \
12311 int32x2_t b_ = (b); \
12312 int32x2_t a_ = (a); \
12313 int64x2_t result; \
12314 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
12315 : "=w"(result) \
12316 : "w"(a_), "w"(b_), "i"(c) \
12317 : /* No clobbers */); \
12318 result; \
12321 #define vmull_lane_u16(a, b, c) \
12322 __extension__ \
12323 ({ \
12324 uint16x4_t b_ = (b); \
12325 uint16x4_t a_ = (a); \
12326 uint32x4_t result; \
12327 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
12328 : "=w"(result) \
12329 : "w"(a_), "w"(b_), "i"(c) \
12330 : /* No clobbers */); \
12331 result; \
12334 #define vmull_lane_u32(a, b, c) \
12335 __extension__ \
12336 ({ \
12337 uint32x2_t b_ = (b); \
12338 uint32x2_t a_ = (a); \
12339 uint64x2_t result; \
12340 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
12341 : "=w"(result) \
12342 : "w"(a_), "w"(b_), "i"(c) \
12343 : /* No clobbers */); \
12344 result; \
12347 #define vmull_laneq_s16(a, b, c) \
12348 __extension__ \
12349 ({ \
12350 int16x8_t b_ = (b); \
12351 int16x4_t a_ = (a); \
12352 int32x4_t result; \
12353 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
12354 : "=w"(result) \
12355 : "w"(a_), "w"(b_), "i"(c) \
12356 : /* No clobbers */); \
12357 result; \
12360 #define vmull_laneq_s32(a, b, c) \
12361 __extension__ \
12362 ({ \
12363 int32x4_t b_ = (b); \
12364 int32x2_t a_ = (a); \
12365 int64x2_t result; \
12366 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
12367 : "=w"(result) \
12368 : "w"(a_), "w"(b_), "i"(c) \
12369 : /* No clobbers */); \
12370 result; \
12373 #define vmull_laneq_u16(a, b, c) \
12374 __extension__ \
12375 ({ \
12376 uint16x8_t b_ = (b); \
12377 uint16x4_t a_ = (a); \
12378 uint32x4_t result; \
12379 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
12380 : "=w"(result) \
12381 : "w"(a_), "w"(b_), "i"(c) \
12382 : /* No clobbers */); \
12383 result; \
12386 #define vmull_laneq_u32(a, b, c) \
12387 __extension__ \
12388 ({ \
12389 uint32x4_t b_ = (b); \
12390 uint32x2_t a_ = (a); \
12391 uint64x2_t result; \
12392 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
12393 : "=w"(result) \
12394 : "w"(a_), "w"(b_), "i"(c) \
12395 : /* No clobbers */); \
12396 result; \
12399 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12400 vmull_n_s16 (int16x4_t a, int16_t b)
12402 int32x4_t result;
12403 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
12404 : "=w"(result)
12405 : "w"(a), "w"(b)
12406 : /* No clobbers */);
12407 return result;
12410 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12411 vmull_n_s32 (int32x2_t a, int32_t b)
12413 int64x2_t result;
12414 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
12415 : "=w"(result)
12416 : "w"(a), "w"(b)
12417 : /* No clobbers */);
12418 return result;
12421 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12422 vmull_n_u16 (uint16x4_t a, uint16_t b)
12424 uint32x4_t result;
12425 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
12426 : "=w"(result)
12427 : "w"(a), "w"(b)
12428 : /* No clobbers */);
12429 return result;
12432 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12433 vmull_n_u32 (uint32x2_t a, uint32_t b)
12435 uint64x2_t result;
12436 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
12437 : "=w"(result)
12438 : "w"(a), "w"(b)
12439 : /* No clobbers */);
12440 return result;
12443 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12444 vmull_p8 (poly8x8_t a, poly8x8_t b)
12446 poly16x8_t result;
12447 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
12448 : "=w"(result)
12449 : "w"(a), "w"(b)
12450 : /* No clobbers */);
12451 return result;
12454 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12455 vmull_s8 (int8x8_t a, int8x8_t b)
12457 int16x8_t result;
12458 __asm__ ("smull %0.8h, %1.8b, %2.8b"
12459 : "=w"(result)
12460 : "w"(a), "w"(b)
12461 : /* No clobbers */);
12462 return result;
12465 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12466 vmull_s16 (int16x4_t a, int16x4_t b)
12468 int32x4_t result;
12469 __asm__ ("smull %0.4s, %1.4h, %2.4h"
12470 : "=w"(result)
12471 : "w"(a), "w"(b)
12472 : /* No clobbers */);
12473 return result;
12476 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
12477 vmull_s32 (int32x2_t a, int32x2_t b)
12479 int64x2_t result;
12480 __asm__ ("smull %0.2d, %1.2s, %2.2s"
12481 : "=w"(result)
12482 : "w"(a), "w"(b)
12483 : /* No clobbers */);
12484 return result;
12487 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12488 vmull_u8 (uint8x8_t a, uint8x8_t b)
12490 uint16x8_t result;
12491 __asm__ ("umull %0.8h, %1.8b, %2.8b"
12492 : "=w"(result)
12493 : "w"(a), "w"(b)
12494 : /* No clobbers */);
12495 return result;
12498 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12499 vmull_u16 (uint16x4_t a, uint16x4_t b)
12501 uint32x4_t result;
12502 __asm__ ("umull %0.4s, %1.4h, %2.4h"
12503 : "=w"(result)
12504 : "w"(a), "w"(b)
12505 : /* No clobbers */);
12506 return result;
12509 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
12510 vmull_u32 (uint32x2_t a, uint32x2_t b)
12512 uint64x2_t result;
12513 __asm__ ("umull %0.2d, %1.2s, %2.2s"
12514 : "=w"(result)
12515 : "w"(a), "w"(b)
12516 : /* No clobbers */);
12517 return result;
12520 #define vmulq_lane_f32(a, b, c) \
12521 __extension__ \
12522 ({ \
12523 float32x2_t b_ = (b); \
12524 float32x4_t a_ = (a); \
12525 float32x4_t result; \
12526 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
12527 : "=w"(result) \
12528 : "w"(a_), "w"(b_), "i"(c) \
12529 : /* No clobbers */); \
12530 result; \
12533 #define vmulq_lane_f64(a, b, c) \
12534 __extension__ \
12535 ({ \
12536 float64x1_t b_ = (b); \
12537 float64x2_t a_ = (a); \
12538 float64x2_t result; \
12539 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
12540 : "=w"(result) \
12541 : "w"(a_), "w"(b_), "i"(c) \
12542 : /* No clobbers */); \
12543 result; \
12546 #define vmulq_lane_s16(a, b, c) \
12547 __extension__ \
12548 ({ \
12549 int16x4_t b_ = (b); \
12550 int16x8_t a_ = (a); \
12551 int16x8_t result; \
12552 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
12553 : "=w"(result) \
12554 : "w"(a_), "w"(b_), "i"(c) \
12555 : /* No clobbers */); \
12556 result; \
12559 #define vmulq_lane_s32(a, b, c) \
12560 __extension__ \
12561 ({ \
12562 int32x2_t b_ = (b); \
12563 int32x4_t a_ = (a); \
12564 int32x4_t result; \
12565 __asm__ ("mul %0.4s,%1.4s,%2.s[%3]" \
12566 : "=w"(result) \
12567 : "w"(a_), "w"(b_), "i"(c) \
12568 : /* No clobbers */); \
12569 result; \
12572 #define vmulq_lane_u16(a, b, c) \
12573 __extension__ \
12574 ({ \
12575 uint16x4_t b_ = (b); \
12576 uint16x8_t a_ = (a); \
12577 uint16x8_t result; \
12578 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
12579 : "=w"(result) \
12580 : "w"(a_), "w"(b_), "i"(c) \
12581 : /* No clobbers */); \
12582 result; \
12585 #define vmulq_lane_u32(a, b, c) \
12586 __extension__ \
12587 ({ \
12588 uint32x2_t b_ = (b); \
12589 uint32x4_t a_ = (a); \
12590 uint32x4_t result; \
12591 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
12592 : "=w"(result) \
12593 : "w"(a_), "w"(b_), "i"(c) \
12594 : /* No clobbers */); \
12595 result; \
12598 #define vmulq_laneq_f32(a, b, c) \
12599 __extension__ \
12600 ({ \
12601 float32x4_t b_ = (b); \
12602 float32x4_t a_ = (a); \
12603 float32x4_t result; \
12604 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
12605 : "=w"(result) \
12606 : "w"(a_), "w"(b_), "i"(c) \
12607 : /* No clobbers */); \
12608 result; \
12611 #define vmulq_laneq_f64(a, b, c) \
12612 __extension__ \
12613 ({ \
12614 float64x2_t b_ = (b); \
12615 float64x2_t a_ = (a); \
12616 float64x2_t result; \
12617 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
12618 : "=w"(result) \
12619 : "w"(a_), "w"(b_), "i"(c) \
12620 : /* No clobbers */); \
12621 result; \
12624 #define vmulq_laneq_s16(a, b, c) \
12625 __extension__ \
12626 ({ \
12627 int16x8_t b_ = (b); \
12628 int16x8_t a_ = (a); \
12629 int16x8_t result; \
12630 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
12631 : "=w"(result) \
12632 : "w"(a_), "w"(b_), "i"(c) \
12633 : /* No clobbers */); \
12634 result; \
12637 #define vmulq_laneq_s32(a, b, c) \
12638 __extension__ \
12639 ({ \
12640 int32x4_t b_ = (b); \
12641 int32x4_t a_ = (a); \
12642 int32x4_t result; \
12643 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
12644 : "=w"(result) \
12645 : "w"(a_), "w"(b_), "i"(c) \
12646 : /* No clobbers */); \
12647 result; \
12650 #define vmulq_laneq_u16(a, b, c) \
12651 __extension__ \
12652 ({ \
12653 uint16x8_t b_ = (b); \
12654 uint16x8_t a_ = (a); \
12655 uint16x8_t result; \
12656 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
12657 : "=w"(result) \
12658 : "w"(a_), "w"(b_), "i"(c) \
12659 : /* No clobbers */); \
12660 result; \
12663 #define vmulq_laneq_u32(a, b, c) \
12664 __extension__ \
12665 ({ \
12666 uint32x4_t b_ = (b); \
12667 uint32x4_t a_ = (a); \
12668 uint32x4_t result; \
12669 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
12670 : "=w"(result) \
12671 : "w"(a_), "w"(b_), "i"(c) \
12672 : /* No clobbers */); \
12673 result; \
12676 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12677 vmulq_n_f32 (float32x4_t a, float32_t b)
12679 float32x4_t result;
12680 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
12681 : "=w"(result)
12682 : "w"(a), "w"(b)
12683 : /* No clobbers */);
12684 return result;
12687 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12688 vmulq_n_f64 (float64x2_t a, float64_t b)
12690 float64x2_t result;
12691 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
12692 : "=w"(result)
12693 : "w"(a), "w"(b)
12694 : /* No clobbers */);
12695 return result;
12698 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12699 vmulq_n_s16 (int16x8_t a, int16_t b)
12701 int16x8_t result;
12702 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
12703 : "=w"(result)
12704 : "w"(a), "w"(b)
12705 : /* No clobbers */);
12706 return result;
12709 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12710 vmulq_n_s32 (int32x4_t a, int32_t b)
12712 int32x4_t result;
12713 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
12714 : "=w"(result)
12715 : "w"(a), "w"(b)
12716 : /* No clobbers */);
12717 return result;
12720 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12721 vmulq_n_u16 (uint16x8_t a, uint16_t b)
12723 uint16x8_t result;
12724 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
12725 : "=w"(result)
12726 : "w"(a), "w"(b)
12727 : /* No clobbers */);
12728 return result;
12731 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12732 vmulq_n_u32 (uint32x4_t a, uint32_t b)
12734 uint32x4_t result;
12735 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
12736 : "=w"(result)
12737 : "w"(a), "w"(b)
12738 : /* No clobbers */);
12739 return result;
12742 #define vmuls_lane_f32(a, b, c) \
12743 __extension__ \
12744 ({ \
12745 float32x4_t b_ = (b); \
12746 float32_t a_ = (a); \
12747 float32_t result; \
12748 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
12749 : "=w"(result) \
12750 : "w"(a_), "w"(b_), "i"(c) \
12751 : /* No clobbers */); \
12752 result; \
12755 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12756 vmulx_f32 (float32x2_t a, float32x2_t b)
12758 float32x2_t result;
12759 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
12760 : "=w"(result)
12761 : "w"(a), "w"(b)
12762 : /* No clobbers */);
12763 return result;
12766 #define vmulx_lane_f32(a, b, c) \
12767 __extension__ \
12768 ({ \
12769 float32x4_t b_ = (b); \
12770 float32x2_t a_ = (a); \
12771 float32x2_t result; \
12772 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
12773 : "=w"(result) \
12774 : "w"(a_), "w"(b_), "i"(c) \
12775 : /* No clobbers */); \
12776 result; \
12779 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12780 vmulxd_f64 (float64_t a, float64_t b)
12782 float64_t result;
12783 __asm__ ("fmulx %d0, %d1, %d2"
12784 : "=w"(result)
12785 : "w"(a), "w"(b)
12786 : /* No clobbers */);
12787 return result;
12790 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12791 vmulxq_f32 (float32x4_t a, float32x4_t b)
12793 float32x4_t result;
12794 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
12795 : "=w"(result)
12796 : "w"(a), "w"(b)
12797 : /* No clobbers */);
12798 return result;
12801 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12802 vmulxq_f64 (float64x2_t a, float64x2_t b)
12804 float64x2_t result;
12805 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
12806 : "=w"(result)
12807 : "w"(a), "w"(b)
12808 : /* No clobbers */);
12809 return result;
12812 #define vmulxq_lane_f32(a, b, c) \
12813 __extension__ \
12814 ({ \
12815 float32x4_t b_ = (b); \
12816 float32x4_t a_ = (a); \
12817 float32x4_t result; \
12818 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
12819 : "=w"(result) \
12820 : "w"(a_), "w"(b_), "i"(c) \
12821 : /* No clobbers */); \
12822 result; \
12825 #define vmulxq_lane_f64(a, b, c) \
12826 __extension__ \
12827 ({ \
12828 float64x2_t b_ = (b); \
12829 float64x2_t a_ = (a); \
12830 float64x2_t result; \
12831 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
12832 : "=w"(result) \
12833 : "w"(a_), "w"(b_), "i"(c) \
12834 : /* No clobbers */); \
12835 result; \
12838 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12839 vmulxs_f32 (float32_t a, float32_t b)
12841 float32_t result;
12842 __asm__ ("fmulx %s0, %s1, %s2"
12843 : "=w"(result)
12844 : "w"(a), "w"(b)
12845 : /* No clobbers */);
12846 return result;
12849 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12850 vmvn_p8 (poly8x8_t a)
12852 poly8x8_t result;
12853 __asm__ ("mvn %0.8b,%1.8b"
12854 : "=w"(result)
12855 : "w"(a)
12856 : /* No clobbers */);
12857 return result;
12860 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12861 vmvn_s8 (int8x8_t a)
12863 int8x8_t result;
12864 __asm__ ("mvn %0.8b,%1.8b"
12865 : "=w"(result)
12866 : "w"(a)
12867 : /* No clobbers */);
12868 return result;
12871 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12872 vmvn_s16 (int16x4_t a)
12874 int16x4_t result;
12875 __asm__ ("mvn %0.8b,%1.8b"
12876 : "=w"(result)
12877 : "w"(a)
12878 : /* No clobbers */);
12879 return result;
12882 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12883 vmvn_s32 (int32x2_t a)
12885 int32x2_t result;
12886 __asm__ ("mvn %0.8b,%1.8b"
12887 : "=w"(result)
12888 : "w"(a)
12889 : /* No clobbers */);
12890 return result;
12893 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12894 vmvn_u8 (uint8x8_t a)
12896 uint8x8_t result;
12897 __asm__ ("mvn %0.8b,%1.8b"
12898 : "=w"(result)
12899 : "w"(a)
12900 : /* No clobbers */);
12901 return result;
12904 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12905 vmvn_u16 (uint16x4_t a)
12907 uint16x4_t result;
12908 __asm__ ("mvn %0.8b,%1.8b"
12909 : "=w"(result)
12910 : "w"(a)
12911 : /* No clobbers */);
12912 return result;
12915 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12916 vmvn_u32 (uint32x2_t a)
12918 uint32x2_t result;
12919 __asm__ ("mvn %0.8b,%1.8b"
12920 : "=w"(result)
12921 : "w"(a)
12922 : /* No clobbers */);
12923 return result;
12926 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12927 vmvnq_p8 (poly8x16_t a)
12929 poly8x16_t result;
12930 __asm__ ("mvn %0.16b,%1.16b"
12931 : "=w"(result)
12932 : "w"(a)
12933 : /* No clobbers */);
12934 return result;
12937 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12938 vmvnq_s8 (int8x16_t a)
12940 int8x16_t result;
12941 __asm__ ("mvn %0.16b,%1.16b"
12942 : "=w"(result)
12943 : "w"(a)
12944 : /* No clobbers */);
12945 return result;
12948 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12949 vmvnq_s16 (int16x8_t a)
12951 int16x8_t result;
12952 __asm__ ("mvn %0.16b,%1.16b"
12953 : "=w"(result)
12954 : "w"(a)
12955 : /* No clobbers */);
12956 return result;
12959 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12960 vmvnq_s32 (int32x4_t a)
12962 int32x4_t result;
12963 __asm__ ("mvn %0.16b,%1.16b"
12964 : "=w"(result)
12965 : "w"(a)
12966 : /* No clobbers */);
12967 return result;
12970 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12971 vmvnq_u8 (uint8x16_t a)
12973 uint8x16_t result;
12974 __asm__ ("mvn %0.16b,%1.16b"
12975 : "=w"(result)
12976 : "w"(a)
12977 : /* No clobbers */);
12978 return result;
12981 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12982 vmvnq_u16 (uint16x8_t a)
12984 uint16x8_t result;
12985 __asm__ ("mvn %0.16b,%1.16b"
12986 : "=w"(result)
12987 : "w"(a)
12988 : /* No clobbers */);
12989 return result;
12992 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12993 vmvnq_u32 (uint32x4_t a)
12995 uint32x4_t result;
12996 __asm__ ("mvn %0.16b,%1.16b"
12997 : "=w"(result)
12998 : "w"(a)
12999 : /* No clobbers */);
13000 return result;
13003 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13004 vneg_f32 (float32x2_t a)
13006 float32x2_t result;
13007 __asm__ ("fneg %0.2s,%1.2s"
13008 : "=w"(result)
13009 : "w"(a)
13010 : /* No clobbers */);
13011 return result;
13014 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13015 vneg_s8 (int8x8_t a)
13017 int8x8_t result;
13018 __asm__ ("neg %0.8b,%1.8b"
13019 : "=w"(result)
13020 : "w"(a)
13021 : /* No clobbers */);
13022 return result;
13025 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13026 vneg_s16 (int16x4_t a)
13028 int16x4_t result;
13029 __asm__ ("neg %0.4h,%1.4h"
13030 : "=w"(result)
13031 : "w"(a)
13032 : /* No clobbers */);
13033 return result;
13036 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13037 vneg_s32 (int32x2_t a)
13039 int32x2_t result;
13040 __asm__ ("neg %0.2s,%1.2s"
13041 : "=w"(result)
13042 : "w"(a)
13043 : /* No clobbers */);
13044 return result;
13047 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13048 vnegq_f32 (float32x4_t a)
13050 float32x4_t result;
13051 __asm__ ("fneg %0.4s,%1.4s"
13052 : "=w"(result)
13053 : "w"(a)
13054 : /* No clobbers */);
13055 return result;
13058 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13059 vnegq_f64 (float64x2_t a)
13061 float64x2_t result;
13062 __asm__ ("fneg %0.2d,%1.2d"
13063 : "=w"(result)
13064 : "w"(a)
13065 : /* No clobbers */);
13066 return result;
13069 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13070 vnegq_s8 (int8x16_t a)
13072 int8x16_t result;
13073 __asm__ ("neg %0.16b,%1.16b"
13074 : "=w"(result)
13075 : "w"(a)
13076 : /* No clobbers */);
13077 return result;
13080 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13081 vnegq_s16 (int16x8_t a)
13083 int16x8_t result;
13084 __asm__ ("neg %0.8h,%1.8h"
13085 : "=w"(result)
13086 : "w"(a)
13087 : /* No clobbers */);
13088 return result;
13091 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13092 vnegq_s32 (int32x4_t a)
13094 int32x4_t result;
13095 __asm__ ("neg %0.4s,%1.4s"
13096 : "=w"(result)
13097 : "w"(a)
13098 : /* No clobbers */);
13099 return result;
13102 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13103 vnegq_s64 (int64x2_t a)
13105 int64x2_t result;
13106 __asm__ ("neg %0.2d,%1.2d"
13107 : "=w"(result)
13108 : "w"(a)
13109 : /* No clobbers */);
13110 return result;
13113 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13114 vpadal_s8 (int16x4_t a, int8x8_t b)
13116 int16x4_t result;
13117 __asm__ ("sadalp %0.4h,%2.8b"
13118 : "=w"(result)
13119 : "0"(a), "w"(b)
13120 : /* No clobbers */);
13121 return result;
13124 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13125 vpadal_s16 (int32x2_t a, int16x4_t b)
13127 int32x2_t result;
13128 __asm__ ("sadalp %0.2s,%2.4h"
13129 : "=w"(result)
13130 : "0"(a), "w"(b)
13131 : /* No clobbers */);
13132 return result;
13135 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13136 vpadal_s32 (int64x1_t a, int32x2_t b)
13138 int64x1_t result;
13139 __asm__ ("sadalp %0.1d,%2.2s"
13140 : "=w"(result)
13141 : "0"(a), "w"(b)
13142 : /* No clobbers */);
13143 return result;
13146 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13147 vpadal_u8 (uint16x4_t a, uint8x8_t b)
13149 uint16x4_t result;
13150 __asm__ ("uadalp %0.4h,%2.8b"
13151 : "=w"(result)
13152 : "0"(a), "w"(b)
13153 : /* No clobbers */);
13154 return result;
13157 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13158 vpadal_u16 (uint32x2_t a, uint16x4_t b)
13160 uint32x2_t result;
13161 __asm__ ("uadalp %0.2s,%2.4h"
13162 : "=w"(result)
13163 : "0"(a), "w"(b)
13164 : /* No clobbers */);
13165 return result;
13168 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13169 vpadal_u32 (uint64x1_t a, uint32x2_t b)
13171 uint64x1_t result;
13172 __asm__ ("uadalp %0.1d,%2.2s"
13173 : "=w"(result)
13174 : "0"(a), "w"(b)
13175 : /* No clobbers */);
13176 return result;
13179 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13180 vpadalq_s8 (int16x8_t a, int8x16_t b)
13182 int16x8_t result;
13183 __asm__ ("sadalp %0.8h,%2.16b"
13184 : "=w"(result)
13185 : "0"(a), "w"(b)
13186 : /* No clobbers */);
13187 return result;
13190 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13191 vpadalq_s16 (int32x4_t a, int16x8_t b)
13193 int32x4_t result;
13194 __asm__ ("sadalp %0.4s,%2.8h"
13195 : "=w"(result)
13196 : "0"(a), "w"(b)
13197 : /* No clobbers */);
13198 return result;
13201 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13202 vpadalq_s32 (int64x2_t a, int32x4_t b)
13204 int64x2_t result;
13205 __asm__ ("sadalp %0.2d,%2.4s"
13206 : "=w"(result)
13207 : "0"(a), "w"(b)
13208 : /* No clobbers */);
13209 return result;
13212 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13213 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
13215 uint16x8_t result;
13216 __asm__ ("uadalp %0.8h,%2.16b"
13217 : "=w"(result)
13218 : "0"(a), "w"(b)
13219 : /* No clobbers */);
13220 return result;
13223 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13224 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
13226 uint32x4_t result;
13227 __asm__ ("uadalp %0.4s,%2.8h"
13228 : "=w"(result)
13229 : "0"(a), "w"(b)
13230 : /* No clobbers */);
13231 return result;
13234 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13235 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
13237 uint64x2_t result;
13238 __asm__ ("uadalp %0.2d,%2.4s"
13239 : "=w"(result)
13240 : "0"(a), "w"(b)
13241 : /* No clobbers */);
13242 return result;
13245 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13246 vpadd_f32 (float32x2_t a, float32x2_t b)
13248 float32x2_t result;
13249 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
13250 : "=w"(result)
13251 : "w"(a), "w"(b)
13252 : /* No clobbers */);
13253 return result;
13256 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13257 vpadd_s8 (int8x8_t __a, int8x8_t __b)
13259 return __builtin_aarch64_addpv8qi (__a, __b);
13262 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13263 vpadd_s16 (int16x4_t __a, int16x4_t __b)
13265 return __builtin_aarch64_addpv4hi (__a, __b);
13268 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13269 vpadd_s32 (int32x2_t __a, int32x2_t __b)
13271 return __builtin_aarch64_addpv2si (__a, __b);
13274 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13275 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
13277 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
13278 (int8x8_t) __b);
13281 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13282 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
13284 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
13285 (int16x4_t) __b);
13288 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13289 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
13291 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
13292 (int32x2_t) __b);
13295 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13296 vpaddd_f64 (float64x2_t a)
13298 float64_t result;
13299 __asm__ ("faddp %d0,%1.2d"
13300 : "=w"(result)
13301 : "w"(a)
13302 : /* No clobbers */);
13303 return result;
13306 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13307 vpaddl_s8 (int8x8_t a)
13309 int16x4_t result;
13310 __asm__ ("saddlp %0.4h,%1.8b"
13311 : "=w"(result)
13312 : "w"(a)
13313 : /* No clobbers */);
13314 return result;
13317 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13318 vpaddl_s16 (int16x4_t a)
13320 int32x2_t result;
13321 __asm__ ("saddlp %0.2s,%1.4h"
13322 : "=w"(result)
13323 : "w"(a)
13324 : /* No clobbers */);
13325 return result;
13328 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13329 vpaddl_s32 (int32x2_t a)
13331 int64x1_t result;
13332 __asm__ ("saddlp %0.1d,%1.2s"
13333 : "=w"(result)
13334 : "w"(a)
13335 : /* No clobbers */);
13336 return result;
13339 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13340 vpaddl_u8 (uint8x8_t a)
13342 uint16x4_t result;
13343 __asm__ ("uaddlp %0.4h,%1.8b"
13344 : "=w"(result)
13345 : "w"(a)
13346 : /* No clobbers */);
13347 return result;
13350 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13351 vpaddl_u16 (uint16x4_t a)
13353 uint32x2_t result;
13354 __asm__ ("uaddlp %0.2s,%1.4h"
13355 : "=w"(result)
13356 : "w"(a)
13357 : /* No clobbers */);
13358 return result;
13361 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13362 vpaddl_u32 (uint32x2_t a)
13364 uint64x1_t result;
13365 __asm__ ("uaddlp %0.1d,%1.2s"
13366 : "=w"(result)
13367 : "w"(a)
13368 : /* No clobbers */);
13369 return result;
13372 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13373 vpaddlq_s8 (int8x16_t a)
13375 int16x8_t result;
13376 __asm__ ("saddlp %0.8h,%1.16b"
13377 : "=w"(result)
13378 : "w"(a)
13379 : /* No clobbers */);
13380 return result;
13383 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13384 vpaddlq_s16 (int16x8_t a)
13386 int32x4_t result;
13387 __asm__ ("saddlp %0.4s,%1.8h"
13388 : "=w"(result)
13389 : "w"(a)
13390 : /* No clobbers */);
13391 return result;
13394 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13395 vpaddlq_s32 (int32x4_t a)
13397 int64x2_t result;
13398 __asm__ ("saddlp %0.2d,%1.4s"
13399 : "=w"(result)
13400 : "w"(a)
13401 : /* No clobbers */);
13402 return result;
13405 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13406 vpaddlq_u8 (uint8x16_t a)
13408 uint16x8_t result;
13409 __asm__ ("uaddlp %0.8h,%1.16b"
13410 : "=w"(result)
13411 : "w"(a)
13412 : /* No clobbers */);
13413 return result;
13416 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13417 vpaddlq_u16 (uint16x8_t a)
13419 uint32x4_t result;
13420 __asm__ ("uaddlp %0.4s,%1.8h"
13421 : "=w"(result)
13422 : "w"(a)
13423 : /* No clobbers */);
13424 return result;
13427 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13428 vpaddlq_u32 (uint32x4_t a)
13430 uint64x2_t result;
13431 __asm__ ("uaddlp %0.2d,%1.4s"
13432 : "=w"(result)
13433 : "w"(a)
13434 : /* No clobbers */);
13435 return result;
13438 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13439 vpaddq_f32 (float32x4_t a, float32x4_t b)
13441 float32x4_t result;
13442 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
13443 : "=w"(result)
13444 : "w"(a), "w"(b)
13445 : /* No clobbers */);
13446 return result;
13449 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13450 vpaddq_f64 (float64x2_t a, float64x2_t b)
13452 float64x2_t result;
13453 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
13454 : "=w"(result)
13455 : "w"(a), "w"(b)
13456 : /* No clobbers */);
13457 return result;
13460 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13461 vpaddq_s8 (int8x16_t a, int8x16_t b)
13463 int8x16_t result;
13464 __asm__ ("addp %0.16b,%1.16b,%2.16b"
13465 : "=w"(result)
13466 : "w"(a), "w"(b)
13467 : /* No clobbers */);
13468 return result;
13471 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13472 vpaddq_s16 (int16x8_t a, int16x8_t b)
13474 int16x8_t result;
13475 __asm__ ("addp %0.8h,%1.8h,%2.8h"
13476 : "=w"(result)
13477 : "w"(a), "w"(b)
13478 : /* No clobbers */);
13479 return result;
13482 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13483 vpaddq_s32 (int32x4_t a, int32x4_t b)
13485 int32x4_t result;
13486 __asm__ ("addp %0.4s,%1.4s,%2.4s"
13487 : "=w"(result)
13488 : "w"(a), "w"(b)
13489 : /* No clobbers */);
13490 return result;
13493 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13494 vpaddq_s64 (int64x2_t a, int64x2_t b)
13496 int64x2_t result;
13497 __asm__ ("addp %0.2d,%1.2d,%2.2d"
13498 : "=w"(result)
13499 : "w"(a), "w"(b)
13500 : /* No clobbers */);
13501 return result;
13504 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13505 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
13507 uint8x16_t result;
13508 __asm__ ("addp %0.16b,%1.16b,%2.16b"
13509 : "=w"(result)
13510 : "w"(a), "w"(b)
13511 : /* No clobbers */);
13512 return result;
13515 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13516 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
13518 uint16x8_t result;
13519 __asm__ ("addp %0.8h,%1.8h,%2.8h"
13520 : "=w"(result)
13521 : "w"(a), "w"(b)
13522 : /* No clobbers */);
13523 return result;
13526 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13527 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
13529 uint32x4_t result;
13530 __asm__ ("addp %0.4s,%1.4s,%2.4s"
13531 : "=w"(result)
13532 : "w"(a), "w"(b)
13533 : /* No clobbers */);
13534 return result;
13537 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13538 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
13540 uint64x2_t result;
13541 __asm__ ("addp %0.2d,%1.2d,%2.2d"
13542 : "=w"(result)
13543 : "w"(a), "w"(b)
13544 : /* No clobbers */);
13545 return result;
13548 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13549 vpadds_f32 (float32x2_t a)
13551 float32_t result;
13552 __asm__ ("faddp %s0,%1.2s"
13553 : "=w"(result)
13554 : "w"(a)
13555 : /* No clobbers */);
13556 return result;
13559 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13560 vpmax_f32 (float32x2_t a, float32x2_t b)
13562 float32x2_t result;
13563 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
13564 : "=w"(result)
13565 : "w"(a), "w"(b)
13566 : /* No clobbers */);
13567 return result;
13570 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13571 vpmax_s8 (int8x8_t a, int8x8_t b)
13573 int8x8_t result;
13574 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
13575 : "=w"(result)
13576 : "w"(a), "w"(b)
13577 : /* No clobbers */);
13578 return result;
13581 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13582 vpmax_s16 (int16x4_t a, int16x4_t b)
13584 int16x4_t result;
13585 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
13586 : "=w"(result)
13587 : "w"(a), "w"(b)
13588 : /* No clobbers */);
13589 return result;
13592 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13593 vpmax_s32 (int32x2_t a, int32x2_t b)
13595 int32x2_t result;
13596 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
13597 : "=w"(result)
13598 : "w"(a), "w"(b)
13599 : /* No clobbers */);
13600 return result;
13603 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13604 vpmax_u8 (uint8x8_t a, uint8x8_t b)
13606 uint8x8_t result;
13607 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
13608 : "=w"(result)
13609 : "w"(a), "w"(b)
13610 : /* No clobbers */);
13611 return result;
13614 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13615 vpmax_u16 (uint16x4_t a, uint16x4_t b)
13617 uint16x4_t result;
13618 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
13619 : "=w"(result)
13620 : "w"(a), "w"(b)
13621 : /* No clobbers */);
13622 return result;
13625 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13626 vpmax_u32 (uint32x2_t a, uint32x2_t b)
13628 uint32x2_t result;
13629 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
13630 : "=w"(result)
13631 : "w"(a), "w"(b)
13632 : /* No clobbers */);
13633 return result;
13636 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13637 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
13639 float32x2_t result;
13640 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
13641 : "=w"(result)
13642 : "w"(a), "w"(b)
13643 : /* No clobbers */);
13644 return result;
13647 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13648 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
13650 float32x4_t result;
13651 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
13652 : "=w"(result)
13653 : "w"(a), "w"(b)
13654 : /* No clobbers */);
13655 return result;
13658 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13659 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
13661 float64x2_t result;
13662 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
13663 : "=w"(result)
13664 : "w"(a), "w"(b)
13665 : /* No clobbers */);
13666 return result;
13669 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13670 vpmaxnmqd_f64 (float64x2_t a)
13672 float64_t result;
13673 __asm__ ("fmaxnmp %d0,%1.2d"
13674 : "=w"(result)
13675 : "w"(a)
13676 : /* No clobbers */);
13677 return result;
13680 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13681 vpmaxnms_f32 (float32x2_t a)
13683 float32_t result;
13684 __asm__ ("fmaxnmp %s0,%1.2s"
13685 : "=w"(result)
13686 : "w"(a)
13687 : /* No clobbers */);
13688 return result;
13691 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13692 vpmaxq_f32 (float32x4_t a, float32x4_t b)
13694 float32x4_t result;
13695 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
13696 : "=w"(result)
13697 : "w"(a), "w"(b)
13698 : /* No clobbers */);
13699 return result;
13702 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13703 vpmaxq_f64 (float64x2_t a, float64x2_t b)
13705 float64x2_t result;
13706 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
13707 : "=w"(result)
13708 : "w"(a), "w"(b)
13709 : /* No clobbers */);
13710 return result;
13713 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13714 vpmaxq_s8 (int8x16_t a, int8x16_t b)
13716 int8x16_t result;
13717 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
13718 : "=w"(result)
13719 : "w"(a), "w"(b)
13720 : /* No clobbers */);
13721 return result;
13724 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13725 vpmaxq_s16 (int16x8_t a, int16x8_t b)
13727 int16x8_t result;
13728 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
13729 : "=w"(result)
13730 : "w"(a), "w"(b)
13731 : /* No clobbers */);
13732 return result;
13735 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13736 vpmaxq_s32 (int32x4_t a, int32x4_t b)
13738 int32x4_t result;
13739 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
13740 : "=w"(result)
13741 : "w"(a), "w"(b)
13742 : /* No clobbers */);
13743 return result;
13746 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13747 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
13749 uint8x16_t result;
13750 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
13751 : "=w"(result)
13752 : "w"(a), "w"(b)
13753 : /* No clobbers */);
13754 return result;
13757 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13758 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
13760 uint16x8_t result;
13761 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
13762 : "=w"(result)
13763 : "w"(a), "w"(b)
13764 : /* No clobbers */);
13765 return result;
13768 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13769 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
13771 uint32x4_t result;
13772 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
13773 : "=w"(result)
13774 : "w"(a), "w"(b)
13775 : /* No clobbers */);
13776 return result;
13779 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13780 vpmaxqd_f64 (float64x2_t a)
13782 float64_t result;
13783 __asm__ ("fmaxp %d0,%1.2d"
13784 : "=w"(result)
13785 : "w"(a)
13786 : /* No clobbers */);
13787 return result;
13790 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13791 vpmaxs_f32 (float32x2_t a)
13793 float32_t result;
13794 __asm__ ("fmaxp %s0,%1.2s"
13795 : "=w"(result)
13796 : "w"(a)
13797 : /* No clobbers */);
13798 return result;
13801 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13802 vpmin_f32 (float32x2_t a, float32x2_t b)
13804 float32x2_t result;
13805 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
13806 : "=w"(result)
13807 : "w"(a), "w"(b)
13808 : /* No clobbers */);
13809 return result;
13812 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13813 vpmin_s8 (int8x8_t a, int8x8_t b)
13815 int8x8_t result;
13816 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
13817 : "=w"(result)
13818 : "w"(a), "w"(b)
13819 : /* No clobbers */);
13820 return result;
13823 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13824 vpmin_s16 (int16x4_t a, int16x4_t b)
13826 int16x4_t result;
13827 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
13828 : "=w"(result)
13829 : "w"(a), "w"(b)
13830 : /* No clobbers */);
13831 return result;
13834 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13835 vpmin_s32 (int32x2_t a, int32x2_t b)
13837 int32x2_t result;
13838 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
13839 : "=w"(result)
13840 : "w"(a), "w"(b)
13841 : /* No clobbers */);
13842 return result;
13845 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13846 vpmin_u8 (uint8x8_t a, uint8x8_t b)
13848 uint8x8_t result;
13849 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
13850 : "=w"(result)
13851 : "w"(a), "w"(b)
13852 : /* No clobbers */);
13853 return result;
13856 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13857 vpmin_u16 (uint16x4_t a, uint16x4_t b)
13859 uint16x4_t result;
13860 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
13861 : "=w"(result)
13862 : "w"(a), "w"(b)
13863 : /* No clobbers */);
13864 return result;
13867 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13868 vpmin_u32 (uint32x2_t a, uint32x2_t b)
13870 uint32x2_t result;
13871 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
13872 : "=w"(result)
13873 : "w"(a), "w"(b)
13874 : /* No clobbers */);
13875 return result;
13878 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13879 vpminnm_f32 (float32x2_t a, float32x2_t b)
13881 float32x2_t result;
13882 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
13883 : "=w"(result)
13884 : "w"(a), "w"(b)
13885 : /* No clobbers */);
13886 return result;
13889 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13890 vpminnmq_f32 (float32x4_t a, float32x4_t b)
13892 float32x4_t result;
13893 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
13894 : "=w"(result)
13895 : "w"(a), "w"(b)
13896 : /* No clobbers */);
13897 return result;
13900 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13901 vpminnmq_f64 (float64x2_t a, float64x2_t b)
13903 float64x2_t result;
13904 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
13905 : "=w"(result)
13906 : "w"(a), "w"(b)
13907 : /* No clobbers */);
13908 return result;
13911 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13912 vpminnmqd_f64 (float64x2_t a)
13914 float64_t result;
13915 __asm__ ("fminnmp %d0,%1.2d"
13916 : "=w"(result)
13917 : "w"(a)
13918 : /* No clobbers */);
13919 return result;
13922 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13923 vpminnms_f32 (float32x2_t a)
13925 float32_t result;
13926 __asm__ ("fminnmp %s0,%1.2s"
13927 : "=w"(result)
13928 : "w"(a)
13929 : /* No clobbers */);
13930 return result;
13933 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13934 vpminq_f32 (float32x4_t a, float32x4_t b)
13936 float32x4_t result;
13937 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
13938 : "=w"(result)
13939 : "w"(a), "w"(b)
13940 : /* No clobbers */);
13941 return result;
13944 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13945 vpminq_f64 (float64x2_t a, float64x2_t b)
13947 float64x2_t result;
13948 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
13949 : "=w"(result)
13950 : "w"(a), "w"(b)
13951 : /* No clobbers */);
13952 return result;
13955 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13956 vpminq_s8 (int8x16_t a, int8x16_t b)
13958 int8x16_t result;
13959 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
13960 : "=w"(result)
13961 : "w"(a), "w"(b)
13962 : /* No clobbers */);
13963 return result;
13966 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13967 vpminq_s16 (int16x8_t a, int16x8_t b)
13969 int16x8_t result;
13970 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
13971 : "=w"(result)
13972 : "w"(a), "w"(b)
13973 : /* No clobbers */);
13974 return result;
13977 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13978 vpminq_s32 (int32x4_t a, int32x4_t b)
13980 int32x4_t result;
13981 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
13982 : "=w"(result)
13983 : "w"(a), "w"(b)
13984 : /* No clobbers */);
13985 return result;
13988 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13989 vpminq_u8 (uint8x16_t a, uint8x16_t b)
13991 uint8x16_t result;
13992 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
13993 : "=w"(result)
13994 : "w"(a), "w"(b)
13995 : /* No clobbers */);
13996 return result;
13999 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14000 vpminq_u16 (uint16x8_t a, uint16x8_t b)
14002 uint16x8_t result;
14003 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
14004 : "=w"(result)
14005 : "w"(a), "w"(b)
14006 : /* No clobbers */);
14007 return result;
14010 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14011 vpminq_u32 (uint32x4_t a, uint32x4_t b)
14013 uint32x4_t result;
14014 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
14015 : "=w"(result)
14016 : "w"(a), "w"(b)
14017 : /* No clobbers */);
14018 return result;
14021 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
14022 vpminqd_f64 (float64x2_t a)
14024 float64_t result;
14025 __asm__ ("fminp %d0,%1.2d"
14026 : "=w"(result)
14027 : "w"(a)
14028 : /* No clobbers */);
14029 return result;
14032 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
14033 vpmins_f32 (float32x2_t a)
14035 float32_t result;
14036 __asm__ ("fminp %s0,%1.2s"
14037 : "=w"(result)
14038 : "w"(a)
14039 : /* No clobbers */);
14040 return result;
14043 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14044 vqdmulh_n_s16 (int16x4_t a, int16_t b)
14046 int16x4_t result;
14047 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
14048 : "=w"(result)
14049 : "w"(a), "w"(b)
14050 : /* No clobbers */);
14051 return result;
14054 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14055 vqdmulh_n_s32 (int32x2_t a, int32_t b)
14057 int32x2_t result;
14058 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
14059 : "=w"(result)
14060 : "w"(a), "w"(b)
14061 : /* No clobbers */);
14062 return result;
14065 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14066 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
14068 int16x8_t result;
14069 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
14070 : "=w"(result)
14071 : "w"(a), "w"(b)
14072 : /* No clobbers */);
14073 return result;
14076 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14077 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
14079 int32x4_t result;
14080 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
14081 : "=w"(result)
14082 : "w"(a), "w"(b)
14083 : /* No clobbers */);
14084 return result;
14087 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14088 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
14090 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
14091 __asm__ ("sqxtn2 %0.16b, %1.8h"
14092 : "+w"(result)
14093 : "w"(b)
14094 : /* No clobbers */);
14095 return result;
14098 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14099 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
14101 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
14102 __asm__ ("sqxtn2 %0.8h, %1.4s"
14103 : "+w"(result)
14104 : "w"(b)
14105 : /* No clobbers */);
14106 return result;
14109 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14110 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
14112 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
14113 __asm__ ("sqxtn2 %0.4s, %1.2d"
14114 : "+w"(result)
14115 : "w"(b)
14116 : /* No clobbers */);
14117 return result;
14120 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14121 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
14123 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
14124 __asm__ ("uqxtn2 %0.16b, %1.8h"
14125 : "+w"(result)
14126 : "w"(b)
14127 : /* No clobbers */);
14128 return result;
14131 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14132 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
14134 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
14135 __asm__ ("uqxtn2 %0.8h, %1.4s"
14136 : "+w"(result)
14137 : "w"(b)
14138 : /* No clobbers */);
14139 return result;
14142 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14143 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
14145 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
14146 __asm__ ("uqxtn2 %0.4s, %1.2d"
14147 : "+w"(result)
14148 : "w"(b)
14149 : /* No clobbers */);
14150 return result;
14153 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14154 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
14156 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
14157 __asm__ ("sqxtun2 %0.16b, %1.8h"
14158 : "+w"(result)
14159 : "w"(b)
14160 : /* No clobbers */);
14161 return result;
14164 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14165 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
14167 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
14168 __asm__ ("sqxtun2 %0.8h, %1.4s"
14169 : "+w"(result)
14170 : "w"(b)
14171 : /* No clobbers */);
14172 return result;
14175 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14176 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
14178 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
14179 __asm__ ("sqxtun2 %0.4s, %1.2d"
14180 : "+w"(result)
14181 : "w"(b)
14182 : /* No clobbers */);
14183 return result;
14186 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14187 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
14189 int16x4_t result;
14190 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
14191 : "=w"(result)
14192 : "w"(a), "w"(b)
14193 : /* No clobbers */);
14194 return result;
14197 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14198 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
14200 int32x2_t result;
14201 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
14202 : "=w"(result)
14203 : "w"(a), "w"(b)
14204 : /* No clobbers */);
14205 return result;
14208 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14209 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
14211 int16x8_t result;
14212 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
14213 : "=w"(result)
14214 : "w"(a), "w"(b)
14215 : /* No clobbers */);
14216 return result;
14219 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14220 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
14222 int32x4_t result;
14223 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
14224 : "=w"(result)
14225 : "w"(a), "w"(b)
14226 : /* No clobbers */);
14227 return result;
14230 #define vqrshrn_high_n_s16(a, b, c) \
14231 __extension__ \
14232 ({ \
14233 int16x8_t b_ = (b); \
14234 int8x8_t a_ = (a); \
14235 int8x16_t result = vcombine_s8 \
14236 (a_, vcreate_s8 (UINT64_C (0x0))); \
14237 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
14238 : "+w"(result) \
14239 : "w"(b_), "i"(c) \
14240 : /* No clobbers */); \
14241 result; \
14244 #define vqrshrn_high_n_s32(a, b, c) \
14245 __extension__ \
14246 ({ \
14247 int32x4_t b_ = (b); \
14248 int16x4_t a_ = (a); \
14249 int16x8_t result = vcombine_s16 \
14250 (a_, vcreate_s16 (UINT64_C (0x0))); \
14251 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
14252 : "+w"(result) \
14253 : "w"(b_), "i"(c) \
14254 : /* No clobbers */); \
14255 result; \
14258 #define vqrshrn_high_n_s64(a, b, c) \
14259 __extension__ \
14260 ({ \
14261 int64x2_t b_ = (b); \
14262 int32x2_t a_ = (a); \
14263 int32x4_t result = vcombine_s32 \
14264 (a_, vcreate_s32 (UINT64_C (0x0))); \
14265 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
14266 : "+w"(result) \
14267 : "w"(b_), "i"(c) \
14268 : /* No clobbers */); \
14269 result; \
14272 #define vqrshrn_high_n_u16(a, b, c) \
14273 __extension__ \
14274 ({ \
14275 uint16x8_t b_ = (b); \
14276 uint8x8_t a_ = (a); \
14277 uint8x16_t result = vcombine_u8 \
14278 (a_, vcreate_u8 (UINT64_C (0x0))); \
14279 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
14280 : "+w"(result) \
14281 : "w"(b_), "i"(c) \
14282 : /* No clobbers */); \
14283 result; \
14286 #define vqrshrn_high_n_u32(a, b, c) \
14287 __extension__ \
14288 ({ \
14289 uint32x4_t b_ = (b); \
14290 uint16x4_t a_ = (a); \
14291 uint16x8_t result = vcombine_u16 \
14292 (a_, vcreate_u16 (UINT64_C (0x0))); \
14293 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
14294 : "+w"(result) \
14295 : "w"(b_), "i"(c) \
14296 : /* No clobbers */); \
14297 result; \
14300 #define vqrshrn_high_n_u64(a, b, c) \
14301 __extension__ \
14302 ({ \
14303 uint64x2_t b_ = (b); \
14304 uint32x2_t a_ = (a); \
14305 uint32x4_t result = vcombine_u32 \
14306 (a_, vcreate_u32 (UINT64_C (0x0))); \
14307 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
14308 : "+w"(result) \
14309 : "w"(b_), "i"(c) \
14310 : /* No clobbers */); \
14311 result; \
14314 #define vqrshrun_high_n_s16(a, b, c) \
14315 __extension__ \
14316 ({ \
14317 int16x8_t b_ = (b); \
14318 uint8x8_t a_ = (a); \
14319 uint8x16_t result = vcombine_u8 \
14320 (a_, vcreate_u8 (UINT64_C (0x0))); \
14321 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
14322 : "+w"(result) \
14323 : "w"(b_), "i"(c) \
14324 : /* No clobbers */); \
14325 result; \
14328 #define vqrshrun_high_n_s32(a, b, c) \
14329 __extension__ \
14330 ({ \
14331 int32x4_t b_ = (b); \
14332 uint16x4_t a_ = (a); \
14333 uint16x8_t result = vcombine_u16 \
14334 (a_, vcreate_u16 (UINT64_C (0x0))); \
14335 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
14336 : "+w"(result) \
14337 : "w"(b_), "i"(c) \
14338 : /* No clobbers */); \
14339 result; \
14342 #define vqrshrun_high_n_s64(a, b, c) \
14343 __extension__ \
14344 ({ \
14345 int64x2_t b_ = (b); \
14346 uint32x2_t a_ = (a); \
14347 uint32x4_t result = vcombine_u32 \
14348 (a_, vcreate_u32 (UINT64_C (0x0))); \
14349 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
14350 : "+w"(result) \
14351 : "w"(b_), "i"(c) \
14352 : /* No clobbers */); \
14353 result; \
14356 #define vqshrn_high_n_s16(a, b, c) \
14357 __extension__ \
14358 ({ \
14359 int16x8_t b_ = (b); \
14360 int8x8_t a_ = (a); \
14361 int8x16_t result = vcombine_s8 \
14362 (a_, vcreate_s8 (UINT64_C (0x0))); \
14363 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
14364 : "+w"(result) \
14365 : "w"(b_), "i"(c) \
14366 : /* No clobbers */); \
14367 result; \
14370 #define vqshrn_high_n_s32(a, b, c) \
14371 __extension__ \
14372 ({ \
14373 int32x4_t b_ = (b); \
14374 int16x4_t a_ = (a); \
14375 int16x8_t result = vcombine_s16 \
14376 (a_, vcreate_s16 (UINT64_C (0x0))); \
14377 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
14378 : "+w"(result) \
14379 : "w"(b_), "i"(c) \
14380 : /* No clobbers */); \
14381 result; \
14384 #define vqshrn_high_n_s64(a, b, c) \
14385 __extension__ \
14386 ({ \
14387 int64x2_t b_ = (b); \
14388 int32x2_t a_ = (a); \
14389 int32x4_t result = vcombine_s32 \
14390 (a_, vcreate_s32 (UINT64_C (0x0))); \
14391 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
14392 : "+w"(result) \
14393 : "w"(b_), "i"(c) \
14394 : /* No clobbers */); \
14395 result; \
14398 #define vqshrn_high_n_u16(a, b, c) \
14399 __extension__ \
14400 ({ \
14401 uint16x8_t b_ = (b); \
14402 uint8x8_t a_ = (a); \
14403 uint8x16_t result = vcombine_u8 \
14404 (a_, vcreate_u8 (UINT64_C (0x0))); \
14405 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
14406 : "+w"(result) \
14407 : "w"(b_), "i"(c) \
14408 : /* No clobbers */); \
14409 result; \
14412 #define vqshrn_high_n_u32(a, b, c) \
14413 __extension__ \
14414 ({ \
14415 uint32x4_t b_ = (b); \
14416 uint16x4_t a_ = (a); \
14417 uint16x8_t result = vcombine_u16 \
14418 (a_, vcreate_u16 (UINT64_C (0x0))); \
14419 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
14420 : "+w"(result) \
14421 : "w"(b_), "i"(c) \
14422 : /* No clobbers */); \
14423 result; \
14426 #define vqshrn_high_n_u64(a, b, c) \
14427 __extension__ \
14428 ({ \
14429 uint64x2_t b_ = (b); \
14430 uint32x2_t a_ = (a); \
14431 uint32x4_t result = vcombine_u32 \
14432 (a_, vcreate_u32 (UINT64_C (0x0))); \
14433 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
14434 : "+w"(result) \
14435 : "w"(b_), "i"(c) \
14436 : /* No clobbers */); \
14437 result; \
14440 #define vqshrun_high_n_s16(a, b, c) \
14441 __extension__ \
14442 ({ \
14443 int16x8_t b_ = (b); \
14444 uint8x8_t a_ = (a); \
14445 uint8x16_t result = vcombine_u8 \
14446 (a_, vcreate_u8 (UINT64_C (0x0))); \
14447 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
14448 : "+w"(result) \
14449 : "w"(b_), "i"(c) \
14450 : /* No clobbers */); \
14451 result; \
14454 #define vqshrun_high_n_s32(a, b, c) \
14455 __extension__ \
14456 ({ \
14457 int32x4_t b_ = (b); \
14458 uint16x4_t a_ = (a); \
14459 uint16x8_t result = vcombine_u16 \
14460 (a_, vcreate_u16 (UINT64_C (0x0))); \
14461 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
14462 : "+w"(result) \
14463 : "w"(b_), "i"(c) \
14464 : /* No clobbers */); \
14465 result; \
14468 #define vqshrun_high_n_s64(a, b, c) \
14469 __extension__ \
14470 ({ \
14471 int64x2_t b_ = (b); \
14472 uint32x2_t a_ = (a); \
14473 uint32x4_t result = vcombine_u32 \
14474 (a_, vcreate_u32 (UINT64_C (0x0))); \
14475 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
14476 : "+w"(result) \
14477 : "w"(b_), "i"(c) \
14478 : /* No clobbers */); \
14479 result; \
14482 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14483 vrbit_s8 (int8x8_t a)
14485 int8x8_t result;
14486 __asm__ ("rbit %0.8b,%1.8b"
14487 : "=w"(result)
14488 : "w"(a)
14489 : /* No clobbers */);
14490 return result;
14493 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14494 vrbit_u8 (uint8x8_t a)
14496 uint8x8_t result;
14497 __asm__ ("rbit %0.8b,%1.8b"
14498 : "=w"(result)
14499 : "w"(a)
14500 : /* No clobbers */);
14501 return result;
14504 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14505 vrbitq_s8 (int8x16_t a)
14507 int8x16_t result;
14508 __asm__ ("rbit %0.16b,%1.16b"
14509 : "=w"(result)
14510 : "w"(a)
14511 : /* No clobbers */);
14512 return result;
14515 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14516 vrbitq_u8 (uint8x16_t a)
14518 uint8x16_t result;
14519 __asm__ ("rbit %0.16b,%1.16b"
14520 : "=w"(result)
14521 : "w"(a)
14522 : /* No clobbers */);
14523 return result;
14526 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14527 vrecpe_u32 (uint32x2_t a)
14529 uint32x2_t result;
14530 __asm__ ("urecpe %0.2s,%1.2s"
14531 : "=w"(result)
14532 : "w"(a)
14533 : /* No clobbers */);
14534 return result;
14537 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14538 vrecpeq_u32 (uint32x4_t a)
14540 uint32x4_t result;
14541 __asm__ ("urecpe %0.4s,%1.4s"
14542 : "=w"(result)
14543 : "w"(a)
14544 : /* No clobbers */);
14545 return result;
14548 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14549 vrev16_p8 (poly8x8_t a)
14551 poly8x8_t result;
14552 __asm__ ("rev16 %0.8b,%1.8b"
14553 : "=w"(result)
14554 : "w"(a)
14555 : /* No clobbers */);
14556 return result;
14559 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14560 vrev16_s8 (int8x8_t a)
14562 int8x8_t result;
14563 __asm__ ("rev16 %0.8b,%1.8b"
14564 : "=w"(result)
14565 : "w"(a)
14566 : /* No clobbers */);
14567 return result;
14570 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14571 vrev16_u8 (uint8x8_t a)
14573 uint8x8_t result;
14574 __asm__ ("rev16 %0.8b,%1.8b"
14575 : "=w"(result)
14576 : "w"(a)
14577 : /* No clobbers */);
14578 return result;
14581 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14582 vrev16q_p8 (poly8x16_t a)
14584 poly8x16_t result;
14585 __asm__ ("rev16 %0.16b,%1.16b"
14586 : "=w"(result)
14587 : "w"(a)
14588 : /* No clobbers */);
14589 return result;
14592 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14593 vrev16q_s8 (int8x16_t a)
14595 int8x16_t result;
14596 __asm__ ("rev16 %0.16b,%1.16b"
14597 : "=w"(result)
14598 : "w"(a)
14599 : /* No clobbers */);
14600 return result;
14603 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14604 vrev16q_u8 (uint8x16_t a)
14606 uint8x16_t result;
14607 __asm__ ("rev16 %0.16b,%1.16b"
14608 : "=w"(result)
14609 : "w"(a)
14610 : /* No clobbers */);
14611 return result;
14614 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14615 vrev32_p8 (poly8x8_t a)
14617 poly8x8_t result;
14618 __asm__ ("rev32 %0.8b,%1.8b"
14619 : "=w"(result)
14620 : "w"(a)
14621 : /* No clobbers */);
14622 return result;
14625 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14626 vrev32_p16 (poly16x4_t a)
14628 poly16x4_t result;
14629 __asm__ ("rev32 %0.4h,%1.4h"
14630 : "=w"(result)
14631 : "w"(a)
14632 : /* No clobbers */);
14633 return result;
14636 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14637 vrev32_s8 (int8x8_t a)
14639 int8x8_t result;
14640 __asm__ ("rev32 %0.8b,%1.8b"
14641 : "=w"(result)
14642 : "w"(a)
14643 : /* No clobbers */);
14644 return result;
14647 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14648 vrev32_s16 (int16x4_t a)
14650 int16x4_t result;
14651 __asm__ ("rev32 %0.4h,%1.4h"
14652 : "=w"(result)
14653 : "w"(a)
14654 : /* No clobbers */);
14655 return result;
14658 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14659 vrev32_u8 (uint8x8_t a)
14661 uint8x8_t result;
14662 __asm__ ("rev32 %0.8b,%1.8b"
14663 : "=w"(result)
14664 : "w"(a)
14665 : /* No clobbers */);
14666 return result;
14669 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14670 vrev32_u16 (uint16x4_t a)
14672 uint16x4_t result;
14673 __asm__ ("rev32 %0.4h,%1.4h"
14674 : "=w"(result)
14675 : "w"(a)
14676 : /* No clobbers */);
14677 return result;
14680 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14681 vrev32q_p8 (poly8x16_t a)
14683 poly8x16_t result;
14684 __asm__ ("rev32 %0.16b,%1.16b"
14685 : "=w"(result)
14686 : "w"(a)
14687 : /* No clobbers */);
14688 return result;
14691 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14692 vrev32q_p16 (poly16x8_t a)
14694 poly16x8_t result;
14695 __asm__ ("rev32 %0.8h,%1.8h"
14696 : "=w"(result)
14697 : "w"(a)
14698 : /* No clobbers */);
14699 return result;
14702 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14703 vrev32q_s8 (int8x16_t a)
14705 int8x16_t result;
14706 __asm__ ("rev32 %0.16b,%1.16b"
14707 : "=w"(result)
14708 : "w"(a)
14709 : /* No clobbers */);
14710 return result;
14713 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14714 vrev32q_s16 (int16x8_t a)
14716 int16x8_t result;
14717 __asm__ ("rev32 %0.8h,%1.8h"
14718 : "=w"(result)
14719 : "w"(a)
14720 : /* No clobbers */);
14721 return result;
14724 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14725 vrev32q_u8 (uint8x16_t a)
14727 uint8x16_t result;
14728 __asm__ ("rev32 %0.16b,%1.16b"
14729 : "=w"(result)
14730 : "w"(a)
14731 : /* No clobbers */);
14732 return result;
14735 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14736 vrev32q_u16 (uint16x8_t a)
14738 uint16x8_t result;
14739 __asm__ ("rev32 %0.8h,%1.8h"
14740 : "=w"(result)
14741 : "w"(a)
14742 : /* No clobbers */);
14743 return result;
14746 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14747 vrev64_f32 (float32x2_t a)
14749 float32x2_t result;
14750 __asm__ ("rev64 %0.2s,%1.2s"
14751 : "=w"(result)
14752 : "w"(a)
14753 : /* No clobbers */);
14754 return result;
14757 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14758 vrev64_p8 (poly8x8_t a)
14760 poly8x8_t result;
14761 __asm__ ("rev64 %0.8b,%1.8b"
14762 : "=w"(result)
14763 : "w"(a)
14764 : /* No clobbers */);
14765 return result;
14768 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14769 vrev64_p16 (poly16x4_t a)
14771 poly16x4_t result;
14772 __asm__ ("rev64 %0.4h,%1.4h"
14773 : "=w"(result)
14774 : "w"(a)
14775 : /* No clobbers */);
14776 return result;
14779 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14780 vrev64_s8 (int8x8_t a)
14782 int8x8_t result;
14783 __asm__ ("rev64 %0.8b,%1.8b"
14784 : "=w"(result)
14785 : "w"(a)
14786 : /* No clobbers */);
14787 return result;
14790 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14791 vrev64_s16 (int16x4_t a)
14793 int16x4_t result;
14794 __asm__ ("rev64 %0.4h,%1.4h"
14795 : "=w"(result)
14796 : "w"(a)
14797 : /* No clobbers */);
14798 return result;
14801 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14802 vrev64_s32 (int32x2_t a)
14804 int32x2_t result;
14805 __asm__ ("rev64 %0.2s,%1.2s"
14806 : "=w"(result)
14807 : "w"(a)
14808 : /* No clobbers */);
14809 return result;
14812 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14813 vrev64_u8 (uint8x8_t a)
14815 uint8x8_t result;
14816 __asm__ ("rev64 %0.8b,%1.8b"
14817 : "=w"(result)
14818 : "w"(a)
14819 : /* No clobbers */);
14820 return result;
14823 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14824 vrev64_u16 (uint16x4_t a)
14826 uint16x4_t result;
14827 __asm__ ("rev64 %0.4h,%1.4h"
14828 : "=w"(result)
14829 : "w"(a)
14830 : /* No clobbers */);
14831 return result;
14834 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14835 vrev64_u32 (uint32x2_t a)
14837 uint32x2_t result;
14838 __asm__ ("rev64 %0.2s,%1.2s"
14839 : "=w"(result)
14840 : "w"(a)
14841 : /* No clobbers */);
14842 return result;
14845 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14846 vrev64q_f32 (float32x4_t a)
14848 float32x4_t result;
14849 __asm__ ("rev64 %0.4s,%1.4s"
14850 : "=w"(result)
14851 : "w"(a)
14852 : /* No clobbers */);
14853 return result;
14856 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14857 vrev64q_p8 (poly8x16_t a)
14859 poly8x16_t result;
14860 __asm__ ("rev64 %0.16b,%1.16b"
14861 : "=w"(result)
14862 : "w"(a)
14863 : /* No clobbers */);
14864 return result;
14867 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14868 vrev64q_p16 (poly16x8_t a)
14870 poly16x8_t result;
14871 __asm__ ("rev64 %0.8h,%1.8h"
14872 : "=w"(result)
14873 : "w"(a)
14874 : /* No clobbers */);
14875 return result;
14878 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14879 vrev64q_s8 (int8x16_t a)
14881 int8x16_t result;
14882 __asm__ ("rev64 %0.16b,%1.16b"
14883 : "=w"(result)
14884 : "w"(a)
14885 : /* No clobbers */);
14886 return result;
14889 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14890 vrev64q_s16 (int16x8_t a)
14892 int16x8_t result;
14893 __asm__ ("rev64 %0.8h,%1.8h"
14894 : "=w"(result)
14895 : "w"(a)
14896 : /* No clobbers */);
14897 return result;
14900 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14901 vrev64q_s32 (int32x4_t a)
14903 int32x4_t result;
14904 __asm__ ("rev64 %0.4s,%1.4s"
14905 : "=w"(result)
14906 : "w"(a)
14907 : /* No clobbers */);
14908 return result;
14911 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14912 vrev64q_u8 (uint8x16_t a)
14914 uint8x16_t result;
14915 __asm__ ("rev64 %0.16b,%1.16b"
14916 : "=w"(result)
14917 : "w"(a)
14918 : /* No clobbers */);
14919 return result;
14922 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14923 vrev64q_u16 (uint16x8_t a)
14925 uint16x8_t result;
14926 __asm__ ("rev64 %0.8h,%1.8h"
14927 : "=w"(result)
14928 : "w"(a)
14929 : /* No clobbers */);
14930 return result;
14933 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14934 vrev64q_u32 (uint32x4_t a)
14936 uint32x4_t result;
14937 __asm__ ("rev64 %0.4s,%1.4s"
14938 : "=w"(result)
14939 : "w"(a)
14940 : /* No clobbers */);
14941 return result;
14944 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14945 vrnd_f32 (float32x2_t a)
14947 float32x2_t result;
14948 __asm__ ("frintz %0.2s,%1.2s"
14949 : "=w"(result)
14950 : "w"(a)
14951 : /* No clobbers */);
14952 return result;
14955 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14956 vrnda_f32 (float32x2_t a)
14958 float32x2_t result;
14959 __asm__ ("frinta %0.2s,%1.2s"
14960 : "=w"(result)
14961 : "w"(a)
14962 : /* No clobbers */);
14963 return result;
14966 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14967 vrndm_f32 (float32x2_t a)
14969 float32x2_t result;
14970 __asm__ ("frintm %0.2s,%1.2s"
14971 : "=w"(result)
14972 : "w"(a)
14973 : /* No clobbers */);
14974 return result;
14977 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14978 vrndn_f32 (float32x2_t a)
14980 float32x2_t result;
14981 __asm__ ("frintn %0.2s,%1.2s"
14982 : "=w"(result)
14983 : "w"(a)
14984 : /* No clobbers */);
14985 return result;
14988 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14989 vrndp_f32 (float32x2_t a)
14991 float32x2_t result;
14992 __asm__ ("frintp %0.2s,%1.2s"
14993 : "=w"(result)
14994 : "w"(a)
14995 : /* No clobbers */);
14996 return result;
14999 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15000 vrndq_f32 (float32x4_t a)
15002 float32x4_t result;
15003 __asm__ ("frintz %0.4s,%1.4s"
15004 : "=w"(result)
15005 : "w"(a)
15006 : /* No clobbers */);
15007 return result;
15010 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15011 vrndq_f64 (float64x2_t a)
15013 float64x2_t result;
15014 __asm__ ("frintz %0.2d,%1.2d"
15015 : "=w"(result)
15016 : "w"(a)
15017 : /* No clobbers */);
15018 return result;
15021 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15022 vrndqa_f32 (float32x4_t a)
15024 float32x4_t result;
15025 __asm__ ("frinta %0.4s,%1.4s"
15026 : "=w"(result)
15027 : "w"(a)
15028 : /* No clobbers */);
15029 return result;
15032 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15033 vrndqa_f64 (float64x2_t a)
15035 float64x2_t result;
15036 __asm__ ("frinta %0.2d,%1.2d"
15037 : "=w"(result)
15038 : "w"(a)
15039 : /* No clobbers */);
15040 return result;
15043 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15044 vrndqm_f32 (float32x4_t a)
15046 float32x4_t result;
15047 __asm__ ("frintm %0.4s,%1.4s"
15048 : "=w"(result)
15049 : "w"(a)
15050 : /* No clobbers */);
15051 return result;
15054 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15055 vrndqm_f64 (float64x2_t a)
15057 float64x2_t result;
15058 __asm__ ("frintm %0.2d,%1.2d"
15059 : "=w"(result)
15060 : "w"(a)
15061 : /* No clobbers */);
15062 return result;
15065 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15066 vrndqn_f32 (float32x4_t a)
15068 float32x4_t result;
15069 __asm__ ("frintn %0.4s,%1.4s"
15070 : "=w"(result)
15071 : "w"(a)
15072 : /* No clobbers */);
15073 return result;
15076 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15077 vrndqn_f64 (float64x2_t a)
15079 float64x2_t result;
15080 __asm__ ("frintn %0.2d,%1.2d"
15081 : "=w"(result)
15082 : "w"(a)
15083 : /* No clobbers */);
15084 return result;
15087 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15088 vrndqp_f32 (float32x4_t a)
15090 float32x4_t result;
15091 __asm__ ("frintp %0.4s,%1.4s"
15092 : "=w"(result)
15093 : "w"(a)
15094 : /* No clobbers */);
15095 return result;
15098 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15099 vrndqp_f64 (float64x2_t a)
15101 float64x2_t result;
15102 __asm__ ("frintp %0.2d,%1.2d"
15103 : "=w"(result)
15104 : "w"(a)
15105 : /* No clobbers */);
15106 return result;
15109 #define vrshrn_high_n_s16(a, b, c) \
15110 __extension__ \
15111 ({ \
15112 int16x8_t b_ = (b); \
15113 int8x8_t a_ = (a); \
15114 int8x16_t result = vcombine_s8 \
15115 (a_, vcreate_s8 (UINT64_C (0x0))); \
15116 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
15117 : "+w"(result) \
15118 : "w"(b_), "i"(c) \
15119 : /* No clobbers */); \
15120 result; \
15123 #define vrshrn_high_n_s32(a, b, c) \
15124 __extension__ \
15125 ({ \
15126 int32x4_t b_ = (b); \
15127 int16x4_t a_ = (a); \
15128 int16x8_t result = vcombine_s16 \
15129 (a_, vcreate_s16 (UINT64_C (0x0))); \
15130 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
15131 : "+w"(result) \
15132 : "w"(b_), "i"(c) \
15133 : /* No clobbers */); \
15134 result; \
15137 #define vrshrn_high_n_s64(a, b, c) \
15138 __extension__ \
15139 ({ \
15140 int64x2_t b_ = (b); \
15141 int32x2_t a_ = (a); \
15142 int32x4_t result = vcombine_s32 \
15143 (a_, vcreate_s32 (UINT64_C (0x0))); \
15144 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
15145 : "+w"(result) \
15146 : "w"(b_), "i"(c) \
15147 : /* No clobbers */); \
15148 result; \
15151 #define vrshrn_high_n_u16(a, b, c) \
15152 __extension__ \
15153 ({ \
15154 uint16x8_t b_ = (b); \
15155 uint8x8_t a_ = (a); \
15156 uint8x16_t result = vcombine_u8 \
15157 (a_, vcreate_u8 (UINT64_C (0x0))); \
15158 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
15159 : "+w"(result) \
15160 : "w"(b_), "i"(c) \
15161 : /* No clobbers */); \
15162 result; \
15165 #define vrshrn_high_n_u32(a, b, c) \
15166 __extension__ \
15167 ({ \
15168 uint32x4_t b_ = (b); \
15169 uint16x4_t a_ = (a); \
15170 uint16x8_t result = vcombine_u16 \
15171 (a_, vcreate_u16 (UINT64_C (0x0))); \
15172 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
15173 : "+w"(result) \
15174 : "w"(b_), "i"(c) \
15175 : /* No clobbers */); \
15176 result; \
15179 #define vrshrn_high_n_u64(a, b, c) \
15180 __extension__ \
15181 ({ \
15182 uint64x2_t b_ = (b); \
15183 uint32x2_t a_ = (a); \
15184 uint32x4_t result = vcombine_u32 \
15185 (a_, vcreate_u32 (UINT64_C (0x0))); \
15186 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
15187 : "+w"(result) \
15188 : "w"(b_), "i"(c) \
15189 : /* No clobbers */); \
15190 result; \
15193 #define vrshrn_n_s16(a, b) \
15194 __extension__ \
15195 ({ \
15196 int16x8_t a_ = (a); \
15197 int8x8_t result; \
15198 __asm__ ("rshrn %0.8b,%1.8h,%2" \
15199 : "=w"(result) \
15200 : "w"(a_), "i"(b) \
15201 : /* No clobbers */); \
15202 result; \
15205 #define vrshrn_n_s32(a, b) \
15206 __extension__ \
15207 ({ \
15208 int32x4_t a_ = (a); \
15209 int16x4_t result; \
15210 __asm__ ("rshrn %0.4h,%1.4s,%2" \
15211 : "=w"(result) \
15212 : "w"(a_), "i"(b) \
15213 : /* No clobbers */); \
15214 result; \
15217 #define vrshrn_n_s64(a, b) \
15218 __extension__ \
15219 ({ \
15220 int64x2_t a_ = (a); \
15221 int32x2_t result; \
15222 __asm__ ("rshrn %0.2s,%1.2d,%2" \
15223 : "=w"(result) \
15224 : "w"(a_), "i"(b) \
15225 : /* No clobbers */); \
15226 result; \
15229 #define vrshrn_n_u16(a, b) \
15230 __extension__ \
15231 ({ \
15232 uint16x8_t a_ = (a); \
15233 uint8x8_t result; \
15234 __asm__ ("rshrn %0.8b,%1.8h,%2" \
15235 : "=w"(result) \
15236 : "w"(a_), "i"(b) \
15237 : /* No clobbers */); \
15238 result; \
15241 #define vrshrn_n_u32(a, b) \
15242 __extension__ \
15243 ({ \
15244 uint32x4_t a_ = (a); \
15245 uint16x4_t result; \
15246 __asm__ ("rshrn %0.4h,%1.4s,%2" \
15247 : "=w"(result) \
15248 : "w"(a_), "i"(b) \
15249 : /* No clobbers */); \
15250 result; \
15253 #define vrshrn_n_u64(a, b) \
15254 __extension__ \
15255 ({ \
15256 uint64x2_t a_ = (a); \
15257 uint32x2_t result; \
15258 __asm__ ("rshrn %0.2s,%1.2d,%2" \
15259 : "=w"(result) \
15260 : "w"(a_), "i"(b) \
15261 : /* No clobbers */); \
15262 result; \
15265 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15266 vrsqrte_f32 (float32x2_t a)
15268 float32x2_t result;
15269 __asm__ ("frsqrte %0.2s,%1.2s"
15270 : "=w"(result)
15271 : "w"(a)
15272 : /* No clobbers */);
15273 return result;
15276 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15277 vrsqrte_f64 (float64x2_t a)
15279 float64x2_t result;
15280 __asm__ ("frsqrte %0.2d,%1.2d"
15281 : "=w"(result)
15282 : "w"(a)
15283 : /* No clobbers */);
15284 return result;
15287 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15288 vrsqrte_u32 (uint32x2_t a)
15290 uint32x2_t result;
15291 __asm__ ("ursqrte %0.2s,%1.2s"
15292 : "=w"(result)
15293 : "w"(a)
15294 : /* No clobbers */);
15295 return result;
15298 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15299 vrsqrted_f64 (float64_t a)
15301 float64_t result;
15302 __asm__ ("frsqrte %d0,%d1"
15303 : "=w"(result)
15304 : "w"(a)
15305 : /* No clobbers */);
15306 return result;
15309 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15310 vrsqrteq_f32 (float32x4_t a)
15312 float32x4_t result;
15313 __asm__ ("frsqrte %0.4s,%1.4s"
15314 : "=w"(result)
15315 : "w"(a)
15316 : /* No clobbers */);
15317 return result;
15320 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15321 vrsqrteq_f64 (float64x2_t a)
15323 float64x2_t result;
15324 __asm__ ("frsqrte %0.2d,%1.2d"
15325 : "=w"(result)
15326 : "w"(a)
15327 : /* No clobbers */);
15328 return result;
15331 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15332 vrsqrteq_u32 (uint32x4_t a)
15334 uint32x4_t result;
15335 __asm__ ("ursqrte %0.4s,%1.4s"
15336 : "=w"(result)
15337 : "w"(a)
15338 : /* No clobbers */);
15339 return result;
15342 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15343 vrsqrtes_f32 (float32_t a)
15345 float32_t result;
15346 __asm__ ("frsqrte %s0,%s1"
15347 : "=w"(result)
15348 : "w"(a)
15349 : /* No clobbers */);
15350 return result;
15353 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15354 vrsqrts_f32 (float32x2_t a, float32x2_t b)
15356 float32x2_t result;
15357 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
15358 : "=w"(result)
15359 : "w"(a), "w"(b)
15360 : /* No clobbers */);
15361 return result;
15364 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15365 vrsqrtsd_f64 (float64_t a, float64_t b)
15367 float64_t result;
15368 __asm__ ("frsqrts %d0,%d1,%d2"
15369 : "=w"(result)
15370 : "w"(a), "w"(b)
15371 : /* No clobbers */);
15372 return result;
15375 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15376 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
15378 float32x4_t result;
15379 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
15380 : "=w"(result)
15381 : "w"(a), "w"(b)
15382 : /* No clobbers */);
15383 return result;
15386 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15387 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
15389 float64x2_t result;
15390 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
15391 : "=w"(result)
15392 : "w"(a), "w"(b)
15393 : /* No clobbers */);
15394 return result;
15397 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15398 vrsqrtss_f32 (float32_t a, float32_t b)
15400 float32_t result;
15401 __asm__ ("frsqrts %s0,%s1,%s2"
15402 : "=w"(result)
15403 : "w"(a), "w"(b)
15404 : /* No clobbers */);
15405 return result;
15408 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15409 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
15411 float64x2_t result;
15412 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
15413 : "=w"(result)
15414 : "w"(a), "w"(b)
15415 : /* No clobbers */);
15416 return result;
15419 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15420 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
15422 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
15423 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
15424 : "+w"(result)
15425 : "w"(b), "w"(c)
15426 : /* No clobbers */);
15427 return result;
15430 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15431 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
15433 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
15434 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
15435 : "+w"(result)
15436 : "w"(b), "w"(c)
15437 : /* No clobbers */);
15438 return result;
15441 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15442 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
15444 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
15445 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
15446 : "+w"(result)
15447 : "w"(b), "w"(c)
15448 : /* No clobbers */);
15449 return result;
15452 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15453 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
15455 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
15456 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
15457 : "+w"(result)
15458 : "w"(b), "w"(c)
15459 : /* No clobbers */);
15460 return result;
15463 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15464 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
15466 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
15467 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
15468 : "+w"(result)
15469 : "w"(b), "w"(c)
15470 : /* No clobbers */);
15471 return result;
15474 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15475 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
15477 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
15478 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
15479 : "+w"(result)
15480 : "w"(b), "w"(c)
15481 : /* No clobbers */);
15482 return result;
15485 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15486 vrsubhn_s16 (int16x8_t a, int16x8_t b)
15488 int8x8_t result;
15489 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
15490 : "=w"(result)
15491 : "w"(a), "w"(b)
15492 : /* No clobbers */);
15493 return result;
15496 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15497 vrsubhn_s32 (int32x4_t a, int32x4_t b)
15499 int16x4_t result;
15500 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
15501 : "=w"(result)
15502 : "w"(a), "w"(b)
15503 : /* No clobbers */);
15504 return result;
15507 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15508 vrsubhn_s64 (int64x2_t a, int64x2_t b)
15510 int32x2_t result;
15511 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
15512 : "=w"(result)
15513 : "w"(a), "w"(b)
15514 : /* No clobbers */);
15515 return result;
15518 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15519 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
15521 uint8x8_t result;
15522 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
15523 : "=w"(result)
15524 : "w"(a), "w"(b)
15525 : /* No clobbers */);
15526 return result;
15529 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15530 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
15532 uint16x4_t result;
15533 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
15534 : "=w"(result)
15535 : "w"(a), "w"(b)
15536 : /* No clobbers */);
15537 return result;
15540 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15541 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
15543 uint32x2_t result;
15544 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
15545 : "=w"(result)
15546 : "w"(a), "w"(b)
15547 : /* No clobbers */);
15548 return result;
15551 #define vset_lane_f32(a, b, c) \
15552 __extension__ \
15553 ({ \
15554 float32x2_t b_ = (b); \
15555 float32_t a_ = (a); \
15556 float32x2_t result; \
15557 __asm__ ("ins %0.s[%3], %w1" \
15558 : "=w"(result) \
15559 : "r"(a_), "0"(b_), "i"(c) \
15560 : /* No clobbers */); \
15561 result; \
15564 #define vset_lane_f64(a, b, c) \
15565 __extension__ \
15566 ({ \
15567 float64x1_t b_ = (b); \
15568 float64_t a_ = (a); \
15569 float64x1_t result; \
15570 __asm__ ("ins %0.d[%3], %x1" \
15571 : "=w"(result) \
15572 : "r"(a_), "0"(b_), "i"(c) \
15573 : /* No clobbers */); \
15574 result; \
15577 #define vset_lane_p8(a, b, c) \
15578 __extension__ \
15579 ({ \
15580 poly8x8_t b_ = (b); \
15581 poly8_t a_ = (a); \
15582 poly8x8_t result; \
15583 __asm__ ("ins %0.b[%3], %w1" \
15584 : "=w"(result) \
15585 : "r"(a_), "0"(b_), "i"(c) \
15586 : /* No clobbers */); \
15587 result; \
15590 #define vset_lane_p16(a, b, c) \
15591 __extension__ \
15592 ({ \
15593 poly16x4_t b_ = (b); \
15594 poly16_t a_ = (a); \
15595 poly16x4_t result; \
15596 __asm__ ("ins %0.h[%3], %w1" \
15597 : "=w"(result) \
15598 : "r"(a_), "0"(b_), "i"(c) \
15599 : /* No clobbers */); \
15600 result; \
15603 #define vset_lane_s8(a, b, c) \
15604 __extension__ \
15605 ({ \
15606 int8x8_t b_ = (b); \
15607 int8_t a_ = (a); \
15608 int8x8_t result; \
15609 __asm__ ("ins %0.b[%3], %w1" \
15610 : "=w"(result) \
15611 : "r"(a_), "0"(b_), "i"(c) \
15612 : /* No clobbers */); \
15613 result; \
15616 #define vset_lane_s16(a, b, c) \
15617 __extension__ \
15618 ({ \
15619 int16x4_t b_ = (b); \
15620 int16_t a_ = (a); \
15621 int16x4_t result; \
15622 __asm__ ("ins %0.h[%3], %w1" \
15623 : "=w"(result) \
15624 : "r"(a_), "0"(b_), "i"(c) \
15625 : /* No clobbers */); \
15626 result; \
15629 #define vset_lane_s32(a, b, c) \
15630 __extension__ \
15631 ({ \
15632 int32x2_t b_ = (b); \
15633 int32_t a_ = (a); \
15634 int32x2_t result; \
15635 __asm__ ("ins %0.s[%3], %w1" \
15636 : "=w"(result) \
15637 : "r"(a_), "0"(b_), "i"(c) \
15638 : /* No clobbers */); \
15639 result; \
15642 #define vset_lane_s64(a, b, c) \
15643 __extension__ \
15644 ({ \
15645 int64x1_t b_ = (b); \
15646 int64_t a_ = (a); \
15647 int64x1_t result; \
15648 __asm__ ("ins %0.d[%3], %x1" \
15649 : "=w"(result) \
15650 : "r"(a_), "0"(b_), "i"(c) \
15651 : /* No clobbers */); \
15652 result; \
15655 #define vset_lane_u8(a, b, c) \
15656 __extension__ \
15657 ({ \
15658 uint8x8_t b_ = (b); \
15659 uint8_t a_ = (a); \
15660 uint8x8_t result; \
15661 __asm__ ("ins %0.b[%3], %w1" \
15662 : "=w"(result) \
15663 : "r"(a_), "0"(b_), "i"(c) \
15664 : /* No clobbers */); \
15665 result; \
15668 #define vset_lane_u16(a, b, c) \
15669 __extension__ \
15670 ({ \
15671 uint16x4_t b_ = (b); \
15672 uint16_t a_ = (a); \
15673 uint16x4_t result; \
15674 __asm__ ("ins %0.h[%3], %w1" \
15675 : "=w"(result) \
15676 : "r"(a_), "0"(b_), "i"(c) \
15677 : /* No clobbers */); \
15678 result; \
15681 #define vset_lane_u32(a, b, c) \
15682 __extension__ \
15683 ({ \
15684 uint32x2_t b_ = (b); \
15685 uint32_t a_ = (a); \
15686 uint32x2_t result; \
15687 __asm__ ("ins %0.s[%3], %w1" \
15688 : "=w"(result) \
15689 : "r"(a_), "0"(b_), "i"(c) \
15690 : /* No clobbers */); \
15691 result; \
15694 #define vset_lane_u64(a, b, c) \
15695 __extension__ \
15696 ({ \
15697 uint64x1_t b_ = (b); \
15698 uint64_t a_ = (a); \
15699 uint64x1_t result; \
15700 __asm__ ("ins %0.d[%3], %x1" \
15701 : "=w"(result) \
15702 : "r"(a_), "0"(b_), "i"(c) \
15703 : /* No clobbers */); \
15704 result; \
15707 #define vsetq_lane_f32(a, b, c) \
15708 __extension__ \
15709 ({ \
15710 float32x4_t b_ = (b); \
15711 float32_t a_ = (a); \
15712 float32x4_t result; \
15713 __asm__ ("ins %0.s[%3], %w1" \
15714 : "=w"(result) \
15715 : "r"(a_), "0"(b_), "i"(c) \
15716 : /* No clobbers */); \
15717 result; \
15720 #define vsetq_lane_f64(a, b, c) \
15721 __extension__ \
15722 ({ \
15723 float64x2_t b_ = (b); \
15724 float64_t a_ = (a); \
15725 float64x2_t result; \
15726 __asm__ ("ins %0.d[%3], %x1" \
15727 : "=w"(result) \
15728 : "r"(a_), "0"(b_), "i"(c) \
15729 : /* No clobbers */); \
15730 result; \
15733 #define vsetq_lane_p8(a, b, c) \
15734 __extension__ \
15735 ({ \
15736 poly8x16_t b_ = (b); \
15737 poly8_t a_ = (a); \
15738 poly8x16_t result; \
15739 __asm__ ("ins %0.b[%3], %w1" \
15740 : "=w"(result) \
15741 : "r"(a_), "0"(b_), "i"(c) \
15742 : /* No clobbers */); \
15743 result; \
15746 #define vsetq_lane_p16(a, b, c) \
15747 __extension__ \
15748 ({ \
15749 poly16x8_t b_ = (b); \
15750 poly16_t a_ = (a); \
15751 poly16x8_t result; \
15752 __asm__ ("ins %0.h[%3], %w1" \
15753 : "=w"(result) \
15754 : "r"(a_), "0"(b_), "i"(c) \
15755 : /* No clobbers */); \
15756 result; \
15759 #define vsetq_lane_s8(a, b, c) \
15760 __extension__ \
15761 ({ \
15762 int8x16_t b_ = (b); \
15763 int8_t a_ = (a); \
15764 int8x16_t result; \
15765 __asm__ ("ins %0.b[%3], %w1" \
15766 : "=w"(result) \
15767 : "r"(a_), "0"(b_), "i"(c) \
15768 : /* No clobbers */); \
15769 result; \
15772 #define vsetq_lane_s16(a, b, c) \
15773 __extension__ \
15774 ({ \
15775 int16x8_t b_ = (b); \
15776 int16_t a_ = (a); \
15777 int16x8_t result; \
15778 __asm__ ("ins %0.h[%3], %w1" \
15779 : "=w"(result) \
15780 : "r"(a_), "0"(b_), "i"(c) \
15781 : /* No clobbers */); \
15782 result; \
15785 #define vsetq_lane_s32(a, b, c) \
15786 __extension__ \
15787 ({ \
15788 int32x4_t b_ = (b); \
15789 int32_t a_ = (a); \
15790 int32x4_t result; \
15791 __asm__ ("ins %0.s[%3], %w1" \
15792 : "=w"(result) \
15793 : "r"(a_), "0"(b_), "i"(c) \
15794 : /* No clobbers */); \
15795 result; \
15798 #define vsetq_lane_s64(a, b, c) \
15799 __extension__ \
15800 ({ \
15801 int64x2_t b_ = (b); \
15802 int64_t a_ = (a); \
15803 int64x2_t result; \
15804 __asm__ ("ins %0.d[%3], %x1" \
15805 : "=w"(result) \
15806 : "r"(a_), "0"(b_), "i"(c) \
15807 : /* No clobbers */); \
15808 result; \
15811 #define vsetq_lane_u8(a, b, c) \
15812 __extension__ \
15813 ({ \
15814 uint8x16_t b_ = (b); \
15815 uint8_t a_ = (a); \
15816 uint8x16_t result; \
15817 __asm__ ("ins %0.b[%3], %w1" \
15818 : "=w"(result) \
15819 : "r"(a_), "0"(b_), "i"(c) \
15820 : /* No clobbers */); \
15821 result; \
15824 #define vsetq_lane_u16(a, b, c) \
15825 __extension__ \
15826 ({ \
15827 uint16x8_t b_ = (b); \
15828 uint16_t a_ = (a); \
15829 uint16x8_t result; \
15830 __asm__ ("ins %0.h[%3], %w1" \
15831 : "=w"(result) \
15832 : "r"(a_), "0"(b_), "i"(c) \
15833 : /* No clobbers */); \
15834 result; \
15837 #define vsetq_lane_u32(a, b, c) \
15838 __extension__ \
15839 ({ \
15840 uint32x4_t b_ = (b); \
15841 uint32_t a_ = (a); \
15842 uint32x4_t result; \
15843 __asm__ ("ins %0.s[%3], %w1" \
15844 : "=w"(result) \
15845 : "r"(a_), "0"(b_), "i"(c) \
15846 : /* No clobbers */); \
15847 result; \
15850 #define vsetq_lane_u64(a, b, c) \
15851 __extension__ \
15852 ({ \
15853 uint64x2_t b_ = (b); \
15854 uint64_t a_ = (a); \
15855 uint64x2_t result; \
15856 __asm__ ("ins %0.d[%3], %x1" \
15857 : "=w"(result) \
15858 : "r"(a_), "0"(b_), "i"(c) \
15859 : /* No clobbers */); \
15860 result; \
15863 #define vshrn_high_n_s16(a, b, c) \
15864 __extension__ \
15865 ({ \
15866 int16x8_t b_ = (b); \
15867 int8x8_t a_ = (a); \
15868 int8x16_t result = vcombine_s8 \
15869 (a_, vcreate_s8 (UINT64_C (0x0))); \
15870 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
15871 : "+w"(result) \
15872 : "w"(b_), "i"(c) \
15873 : /* No clobbers */); \
15874 result; \
15877 #define vshrn_high_n_s32(a, b, c) \
15878 __extension__ \
15879 ({ \
15880 int32x4_t b_ = (b); \
15881 int16x4_t a_ = (a); \
15882 int16x8_t result = vcombine_s16 \
15883 (a_, vcreate_s16 (UINT64_C (0x0))); \
15884 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
15885 : "+w"(result) \
15886 : "w"(b_), "i"(c) \
15887 : /* No clobbers */); \
15888 result; \
15891 #define vshrn_high_n_s64(a, b, c) \
15892 __extension__ \
15893 ({ \
15894 int64x2_t b_ = (b); \
15895 int32x2_t a_ = (a); \
15896 int32x4_t result = vcombine_s32 \
15897 (a_, vcreate_s32 (UINT64_C (0x0))); \
15898 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
15899 : "+w"(result) \
15900 : "w"(b_), "i"(c) \
15901 : /* No clobbers */); \
15902 result; \
15905 #define vshrn_high_n_u16(a, b, c) \
15906 __extension__ \
15907 ({ \
15908 uint16x8_t b_ = (b); \
15909 uint8x8_t a_ = (a); \
15910 uint8x16_t result = vcombine_u8 \
15911 (a_, vcreate_u8 (UINT64_C (0x0))); \
15912 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
15913 : "+w"(result) \
15914 : "w"(b_), "i"(c) \
15915 : /* No clobbers */); \
15916 result; \
15919 #define vshrn_high_n_u32(a, b, c) \
15920 __extension__ \
15921 ({ \
15922 uint32x4_t b_ = (b); \
15923 uint16x4_t a_ = (a); \
15924 uint16x8_t result = vcombine_u16 \
15925 (a_, vcreate_u16 (UINT64_C (0x0))); \
15926 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
15927 : "+w"(result) \
15928 : "w"(b_), "i"(c) \
15929 : /* No clobbers */); \
15930 result; \
15933 #define vshrn_high_n_u64(a, b, c) \
15934 __extension__ \
15935 ({ \
15936 uint64x2_t b_ = (b); \
15937 uint32x2_t a_ = (a); \
15938 uint32x4_t result = vcombine_u32 \
15939 (a_, vcreate_u32 (UINT64_C (0x0))); \
15940 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
15941 : "+w"(result) \
15942 : "w"(b_), "i"(c) \
15943 : /* No clobbers */); \
15944 result; \
15947 #define vshrn_n_s16(a, b) \
15948 __extension__ \
15949 ({ \
15950 int16x8_t a_ = (a); \
15951 int8x8_t result; \
15952 __asm__ ("shrn %0.8b,%1.8h,%2" \
15953 : "=w"(result) \
15954 : "w"(a_), "i"(b) \
15955 : /* No clobbers */); \
15956 result; \
15959 #define vshrn_n_s32(a, b) \
15960 __extension__ \
15961 ({ \
15962 int32x4_t a_ = (a); \
15963 int16x4_t result; \
15964 __asm__ ("shrn %0.4h,%1.4s,%2" \
15965 : "=w"(result) \
15966 : "w"(a_), "i"(b) \
15967 : /* No clobbers */); \
15968 result; \
15971 #define vshrn_n_s64(a, b) \
15972 __extension__ \
15973 ({ \
15974 int64x2_t a_ = (a); \
15975 int32x2_t result; \
15976 __asm__ ("shrn %0.2s,%1.2d,%2" \
15977 : "=w"(result) \
15978 : "w"(a_), "i"(b) \
15979 : /* No clobbers */); \
15980 result; \
15983 #define vshrn_n_u16(a, b) \
15984 __extension__ \
15985 ({ \
15986 uint16x8_t a_ = (a); \
15987 uint8x8_t result; \
15988 __asm__ ("shrn %0.8b,%1.8h,%2" \
15989 : "=w"(result) \
15990 : "w"(a_), "i"(b) \
15991 : /* No clobbers */); \
15992 result; \
15995 #define vshrn_n_u32(a, b) \
15996 __extension__ \
15997 ({ \
15998 uint32x4_t a_ = (a); \
15999 uint16x4_t result; \
16000 __asm__ ("shrn %0.4h,%1.4s,%2" \
16001 : "=w"(result) \
16002 : "w"(a_), "i"(b) \
16003 : /* No clobbers */); \
16004 result; \
16007 #define vshrn_n_u64(a, b) \
16008 __extension__ \
16009 ({ \
16010 uint64x2_t a_ = (a); \
16011 uint32x2_t result; \
16012 __asm__ ("shrn %0.2s,%1.2d,%2" \
16013 : "=w"(result) \
16014 : "w"(a_), "i"(b) \
16015 : /* No clobbers */); \
16016 result; \
16019 #define vsli_n_p8(a, b, c) \
16020 __extension__ \
16021 ({ \
16022 poly8x8_t b_ = (b); \
16023 poly8x8_t a_ = (a); \
16024 poly8x8_t result; \
16025 __asm__ ("sli %0.8b,%2.8b,%3" \
16026 : "=w"(result) \
16027 : "0"(a_), "w"(b_), "i"(c) \
16028 : /* No clobbers */); \
16029 result; \
16032 #define vsli_n_p16(a, b, c) \
16033 __extension__ \
16034 ({ \
16035 poly16x4_t b_ = (b); \
16036 poly16x4_t a_ = (a); \
16037 poly16x4_t result; \
16038 __asm__ ("sli %0.4h,%2.4h,%3" \
16039 : "=w"(result) \
16040 : "0"(a_), "w"(b_), "i"(c) \
16041 : /* No clobbers */); \
16042 result; \
16045 #define vsliq_n_p8(a, b, c) \
16046 __extension__ \
16047 ({ \
16048 poly8x16_t b_ = (b); \
16049 poly8x16_t a_ = (a); \
16050 poly8x16_t result; \
16051 __asm__ ("sli %0.16b,%2.16b,%3" \
16052 : "=w"(result) \
16053 : "0"(a_), "w"(b_), "i"(c) \
16054 : /* No clobbers */); \
16055 result; \
16058 #define vsliq_n_p16(a, b, c) \
16059 __extension__ \
16060 ({ \
16061 poly16x8_t b_ = (b); \
16062 poly16x8_t a_ = (a); \
16063 poly16x8_t result; \
16064 __asm__ ("sli %0.8h,%2.8h,%3" \
16065 : "=w"(result) \
16066 : "0"(a_), "w"(b_), "i"(c) \
16067 : /* No clobbers */); \
16068 result; \
16071 #define vsri_n_p8(a, b, c) \
16072 __extension__ \
16073 ({ \
16074 poly8x8_t b_ = (b); \
16075 poly8x8_t a_ = (a); \
16076 poly8x8_t result; \
16077 __asm__ ("sri %0.8b,%2.8b,%3" \
16078 : "=w"(result) \
16079 : "0"(a_), "w"(b_), "i"(c) \
16080 : /* No clobbers */); \
16081 result; \
16084 #define vsri_n_p16(a, b, c) \
16085 __extension__ \
16086 ({ \
16087 poly16x4_t b_ = (b); \
16088 poly16x4_t a_ = (a); \
16089 poly16x4_t result; \
16090 __asm__ ("sri %0.4h,%2.4h,%3" \
16091 : "=w"(result) \
16092 : "0"(a_), "w"(b_), "i"(c) \
16093 : /* No clobbers */); \
16094 result; \
16097 #define vsriq_n_p8(a, b, c) \
16098 __extension__ \
16099 ({ \
16100 poly8x16_t b_ = (b); \
16101 poly8x16_t a_ = (a); \
16102 poly8x16_t result; \
16103 __asm__ ("sri %0.16b,%2.16b,%3" \
16104 : "=w"(result) \
16105 : "0"(a_), "w"(b_), "i"(c) \
16106 : /* No clobbers */); \
16107 result; \
16110 #define vsriq_n_p16(a, b, c) \
16111 __extension__ \
16112 ({ \
16113 poly16x8_t b_ = (b); \
16114 poly16x8_t a_ = (a); \
16115 poly16x8_t result; \
16116 __asm__ ("sri %0.8h,%2.8h,%3" \
16117 : "=w"(result) \
16118 : "0"(a_), "w"(b_), "i"(c) \
16119 : /* No clobbers */); \
16120 result; \
16123 __extension__ static __inline void __attribute__ ((__always_inline__))
16124 vst1_f32 (float32_t * a, float32x2_t b)
16126 __asm__ ("st1 {%1.2s},[%0]"
16128 : "r"(a), "w"(b)
16129 : "memory");
16132 __extension__ static __inline void __attribute__ ((__always_inline__))
16133 vst1_f64 (float64_t * a, float64x1_t b)
16135 __asm__ ("st1 {%1.1d},[%0]"
16137 : "r"(a), "w"(b)
16138 : "memory");
16141 #define vst1_lane_f32(a, b, c) \
16142 __extension__ \
16143 ({ \
16144 float32x2_t b_ = (b); \
16145 float32_t * a_ = (a); \
16146 __asm__ ("st1 {%1.s}[%2],[%0]" \
16148 : "r"(a_), "w"(b_), "i"(c) \
16149 : "memory"); \
16152 #define vst1_lane_f64(a, b, c) \
16153 __extension__ \
16154 ({ \
16155 float64x1_t b_ = (b); \
16156 float64_t * a_ = (a); \
16157 __asm__ ("st1 {%1.d}[%2],[%0]" \
16159 : "r"(a_), "w"(b_), "i"(c) \
16160 : "memory"); \
16163 #define vst1_lane_p8(a, b, c) \
16164 __extension__ \
16165 ({ \
16166 poly8x8_t b_ = (b); \
16167 poly8_t * a_ = (a); \
16168 __asm__ ("st1 {%1.b}[%2],[%0]" \
16170 : "r"(a_), "w"(b_), "i"(c) \
16171 : "memory"); \
16174 #define vst1_lane_p16(a, b, c) \
16175 __extension__ \
16176 ({ \
16177 poly16x4_t b_ = (b); \
16178 poly16_t * a_ = (a); \
16179 __asm__ ("st1 {%1.h}[%2],[%0]" \
16181 : "r"(a_), "w"(b_), "i"(c) \
16182 : "memory"); \
16185 #define vst1_lane_s8(a, b, c) \
16186 __extension__ \
16187 ({ \
16188 int8x8_t b_ = (b); \
16189 int8_t * a_ = (a); \
16190 __asm__ ("st1 {%1.b}[%2],[%0]" \
16192 : "r"(a_), "w"(b_), "i"(c) \
16193 : "memory"); \
16196 #define vst1_lane_s16(a, b, c) \
16197 __extension__ \
16198 ({ \
16199 int16x4_t b_ = (b); \
16200 int16_t * a_ = (a); \
16201 __asm__ ("st1 {%1.h}[%2],[%0]" \
16203 : "r"(a_), "w"(b_), "i"(c) \
16204 : "memory"); \
16207 #define vst1_lane_s32(a, b, c) \
16208 __extension__ \
16209 ({ \
16210 int32x2_t b_ = (b); \
16211 int32_t * a_ = (a); \
16212 __asm__ ("st1 {%1.s}[%2],[%0]" \
16214 : "r"(a_), "w"(b_), "i"(c) \
16215 : "memory"); \
16218 #define vst1_lane_s64(a, b, c) \
16219 __extension__ \
16220 ({ \
16221 int64x1_t b_ = (b); \
16222 int64_t * a_ = (a); \
16223 __asm__ ("st1 {%1.d}[%2],[%0]" \
16225 : "r"(a_), "w"(b_), "i"(c) \
16226 : "memory"); \
16229 #define vst1_lane_u8(a, b, c) \
16230 __extension__ \
16231 ({ \
16232 uint8x8_t b_ = (b); \
16233 uint8_t * a_ = (a); \
16234 __asm__ ("st1 {%1.b}[%2],[%0]" \
16236 : "r"(a_), "w"(b_), "i"(c) \
16237 : "memory"); \
16240 #define vst1_lane_u16(a, b, c) \
16241 __extension__ \
16242 ({ \
16243 uint16x4_t b_ = (b); \
16244 uint16_t * a_ = (a); \
16245 __asm__ ("st1 {%1.h}[%2],[%0]" \
16247 : "r"(a_), "w"(b_), "i"(c) \
16248 : "memory"); \
16251 #define vst1_lane_u32(a, b, c) \
16252 __extension__ \
16253 ({ \
16254 uint32x2_t b_ = (b); \
16255 uint32_t * a_ = (a); \
16256 __asm__ ("st1 {%1.s}[%2],[%0]" \
16258 : "r"(a_), "w"(b_), "i"(c) \
16259 : "memory"); \
16262 #define vst1_lane_u64(a, b, c) \
16263 __extension__ \
16264 ({ \
16265 uint64x1_t b_ = (b); \
16266 uint64_t * a_ = (a); \
16267 __asm__ ("st1 {%1.d}[%2],[%0]" \
16269 : "r"(a_), "w"(b_), "i"(c) \
16270 : "memory"); \
16273 __extension__ static __inline void __attribute__ ((__always_inline__))
16274 vst1_p8 (poly8_t * a, poly8x8_t b)
16276 __asm__ ("st1 {%1.8b},[%0]"
16278 : "r"(a), "w"(b)
16279 : "memory");
16282 __extension__ static __inline void __attribute__ ((__always_inline__))
16283 vst1_p16 (poly16_t * a, poly16x4_t b)
16285 __asm__ ("st1 {%1.4h},[%0]"
16287 : "r"(a), "w"(b)
16288 : "memory");
16291 __extension__ static __inline void __attribute__ ((__always_inline__))
16292 vst1_s8 (int8_t * a, int8x8_t b)
16294 __asm__ ("st1 {%1.8b},[%0]"
16296 : "r"(a), "w"(b)
16297 : "memory");
16300 __extension__ static __inline void __attribute__ ((__always_inline__))
16301 vst1_s16 (int16_t * a, int16x4_t b)
16303 __asm__ ("st1 {%1.4h},[%0]"
16305 : "r"(a), "w"(b)
16306 : "memory");
16309 __extension__ static __inline void __attribute__ ((__always_inline__))
16310 vst1_s32 (int32_t * a, int32x2_t b)
16312 __asm__ ("st1 {%1.2s},[%0]"
16314 : "r"(a), "w"(b)
16315 : "memory");
16318 __extension__ static __inline void __attribute__ ((__always_inline__))
16319 vst1_s64 (int64_t * a, int64x1_t b)
16321 __asm__ ("st1 {%1.1d},[%0]"
16323 : "r"(a), "w"(b)
16324 : "memory");
16327 __extension__ static __inline void __attribute__ ((__always_inline__))
16328 vst1_u8 (uint8_t * a, uint8x8_t b)
16330 __asm__ ("st1 {%1.8b},[%0]"
16332 : "r"(a), "w"(b)
16333 : "memory");
16336 __extension__ static __inline void __attribute__ ((__always_inline__))
16337 vst1_u16 (uint16_t * a, uint16x4_t b)
16339 __asm__ ("st1 {%1.4h},[%0]"
16341 : "r"(a), "w"(b)
16342 : "memory");
16345 __extension__ static __inline void __attribute__ ((__always_inline__))
16346 vst1_u32 (uint32_t * a, uint32x2_t b)
16348 __asm__ ("st1 {%1.2s},[%0]"
16350 : "r"(a), "w"(b)
16351 : "memory");
16354 __extension__ static __inline void __attribute__ ((__always_inline__))
16355 vst1_u64 (uint64_t * a, uint64x1_t b)
16357 __asm__ ("st1 {%1.1d},[%0]"
16359 : "r"(a), "w"(b)
16360 : "memory");
16363 __extension__ static __inline void __attribute__ ((__always_inline__))
16364 vst1q_f32 (float32_t * a, float32x4_t b)
16366 __asm__ ("st1 {%1.4s},[%0]"
16368 : "r"(a), "w"(b)
16369 : "memory");
16372 __extension__ static __inline void __attribute__ ((__always_inline__))
16373 vst1q_f64 (float64_t * a, float64x2_t b)
16375 __asm__ ("st1 {%1.2d},[%0]"
16377 : "r"(a), "w"(b)
16378 : "memory");
16381 #define vst1q_lane_f32(a, b, c) \
16382 __extension__ \
16383 ({ \
16384 float32x4_t b_ = (b); \
16385 float32_t * a_ = (a); \
16386 __asm__ ("st1 {%1.s}[%2],[%0]" \
16388 : "r"(a_), "w"(b_), "i"(c) \
16389 : "memory"); \
16392 #define vst1q_lane_f64(a, b, c) \
16393 __extension__ \
16394 ({ \
16395 float64x2_t b_ = (b); \
16396 float64_t * a_ = (a); \
16397 __asm__ ("st1 {%1.d}[%2],[%0]" \
16399 : "r"(a_), "w"(b_), "i"(c) \
16400 : "memory"); \
16403 #define vst1q_lane_p8(a, b, c) \
16404 __extension__ \
16405 ({ \
16406 poly8x16_t b_ = (b); \
16407 poly8_t * a_ = (a); \
16408 __asm__ ("st1 {%1.b}[%2],[%0]" \
16410 : "r"(a_), "w"(b_), "i"(c) \
16411 : "memory"); \
16414 #define vst1q_lane_p16(a, b, c) \
16415 __extension__ \
16416 ({ \
16417 poly16x8_t b_ = (b); \
16418 poly16_t * a_ = (a); \
16419 __asm__ ("st1 {%1.h}[%2],[%0]" \
16421 : "r"(a_), "w"(b_), "i"(c) \
16422 : "memory"); \
16425 #define vst1q_lane_s8(a, b, c) \
16426 __extension__ \
16427 ({ \
16428 int8x16_t b_ = (b); \
16429 int8_t * a_ = (a); \
16430 __asm__ ("st1 {%1.b}[%2],[%0]" \
16432 : "r"(a_), "w"(b_), "i"(c) \
16433 : "memory"); \
16436 #define vst1q_lane_s16(a, b, c) \
16437 __extension__ \
16438 ({ \
16439 int16x8_t b_ = (b); \
16440 int16_t * a_ = (a); \
16441 __asm__ ("st1 {%1.h}[%2],[%0]" \
16443 : "r"(a_), "w"(b_), "i"(c) \
16444 : "memory"); \
16447 #define vst1q_lane_s32(a, b, c) \
16448 __extension__ \
16449 ({ \
16450 int32x4_t b_ = (b); \
16451 int32_t * a_ = (a); \
16452 __asm__ ("st1 {%1.s}[%2],[%0]" \
16454 : "r"(a_), "w"(b_), "i"(c) \
16455 : "memory"); \
16458 #define vst1q_lane_s64(a, b, c) \
16459 __extension__ \
16460 ({ \
16461 int64x2_t b_ = (b); \
16462 int64_t * a_ = (a); \
16463 __asm__ ("st1 {%1.d}[%2],[%0]" \
16465 : "r"(a_), "w"(b_), "i"(c) \
16466 : "memory"); \
16469 #define vst1q_lane_u8(a, b, c) \
16470 __extension__ \
16471 ({ \
16472 uint8x16_t b_ = (b); \
16473 uint8_t * a_ = (a); \
16474 __asm__ ("st1 {%1.b}[%2],[%0]" \
16476 : "r"(a_), "w"(b_), "i"(c) \
16477 : "memory"); \
16480 #define vst1q_lane_u16(a, b, c) \
16481 __extension__ \
16482 ({ \
16483 uint16x8_t b_ = (b); \
16484 uint16_t * a_ = (a); \
16485 __asm__ ("st1 {%1.h}[%2],[%0]" \
16487 : "r"(a_), "w"(b_), "i"(c) \
16488 : "memory"); \
16491 #define vst1q_lane_u32(a, b, c) \
16492 __extension__ \
16493 ({ \
16494 uint32x4_t b_ = (b); \
16495 uint32_t * a_ = (a); \
16496 __asm__ ("st1 {%1.s}[%2],[%0]" \
16498 : "r"(a_), "w"(b_), "i"(c) \
16499 : "memory"); \
16502 #define vst1q_lane_u64(a, b, c) \
16503 __extension__ \
16504 ({ \
16505 uint64x2_t b_ = (b); \
16506 uint64_t * a_ = (a); \
16507 __asm__ ("st1 {%1.d}[%2],[%0]" \
16509 : "r"(a_), "w"(b_), "i"(c) \
16510 : "memory"); \
16513 __extension__ static __inline void __attribute__ ((__always_inline__))
16514 vst1q_p8 (poly8_t * a, poly8x16_t b)
16516 __asm__ ("st1 {%1.16b},[%0]"
16518 : "r"(a), "w"(b)
16519 : "memory");
16522 __extension__ static __inline void __attribute__ ((__always_inline__))
16523 vst1q_p16 (poly16_t * a, poly16x8_t b)
16525 __asm__ ("st1 {%1.8h},[%0]"
16527 : "r"(a), "w"(b)
16528 : "memory");
16531 __extension__ static __inline void __attribute__ ((__always_inline__))
16532 vst1q_s8 (int8_t * a, int8x16_t b)
16534 __asm__ ("st1 {%1.16b},[%0]"
16536 : "r"(a), "w"(b)
16537 : "memory");
16540 __extension__ static __inline void __attribute__ ((__always_inline__))
16541 vst1q_s16 (int16_t * a, int16x8_t b)
16543 __asm__ ("st1 {%1.8h},[%0]"
16545 : "r"(a), "w"(b)
16546 : "memory");
16549 __extension__ static __inline void __attribute__ ((__always_inline__))
16550 vst1q_s32 (int32_t * a, int32x4_t b)
16552 __asm__ ("st1 {%1.4s},[%0]"
16554 : "r"(a), "w"(b)
16555 : "memory");
16558 __extension__ static __inline void __attribute__ ((__always_inline__))
16559 vst1q_s64 (int64_t * a, int64x2_t b)
16561 __asm__ ("st1 {%1.2d},[%0]"
16563 : "r"(a), "w"(b)
16564 : "memory");
16567 __extension__ static __inline void __attribute__ ((__always_inline__))
16568 vst1q_u8 (uint8_t * a, uint8x16_t b)
16570 __asm__ ("st1 {%1.16b},[%0]"
16572 : "r"(a), "w"(b)
16573 : "memory");
16576 __extension__ static __inline void __attribute__ ((__always_inline__))
16577 vst1q_u16 (uint16_t * a, uint16x8_t b)
16579 __asm__ ("st1 {%1.8h},[%0]"
16581 : "r"(a), "w"(b)
16582 : "memory");
16585 __extension__ static __inline void __attribute__ ((__always_inline__))
16586 vst1q_u32 (uint32_t * a, uint32x4_t b)
16588 __asm__ ("st1 {%1.4s},[%0]"
16590 : "r"(a), "w"(b)
16591 : "memory");
16594 __extension__ static __inline void __attribute__ ((__always_inline__))
16595 vst1q_u64 (uint64_t * a, uint64x2_t b)
16597 __asm__ ("st1 {%1.2d},[%0]"
16599 : "r"(a), "w"(b)
16600 : "memory");
16603 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16604 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
16606 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
16607 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
16608 : "+w"(result)
16609 : "w"(b), "w"(c)
16610 : /* No clobbers */);
16611 return result;
16614 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16615 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
16617 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
16618 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
16619 : "+w"(result)
16620 : "w"(b), "w"(c)
16621 : /* No clobbers */);
16622 return result;
16625 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16626 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
16628 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
16629 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
16630 : "+w"(result)
16631 : "w"(b), "w"(c)
16632 : /* No clobbers */);
16633 return result;
16636 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16637 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
16639 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
16640 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
16641 : "+w"(result)
16642 : "w"(b), "w"(c)
16643 : /* No clobbers */);
16644 return result;
16647 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16648 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
16650 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
16651 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
16652 : "+w"(result)
16653 : "w"(b), "w"(c)
16654 : /* No clobbers */);
16655 return result;
16658 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16659 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
16661 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
16662 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
16663 : "+w"(result)
16664 : "w"(b), "w"(c)
16665 : /* No clobbers */);
16666 return result;
16669 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16670 vsubhn_s16 (int16x8_t a, int16x8_t b)
16672 int8x8_t result;
16673 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
16674 : "=w"(result)
16675 : "w"(a), "w"(b)
16676 : /* No clobbers */);
16677 return result;
16680 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16681 vsubhn_s32 (int32x4_t a, int32x4_t b)
16683 int16x4_t result;
16684 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
16685 : "=w"(result)
16686 : "w"(a), "w"(b)
16687 : /* No clobbers */);
16688 return result;
16691 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16692 vsubhn_s64 (int64x2_t a, int64x2_t b)
16694 int32x2_t result;
16695 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
16696 : "=w"(result)
16697 : "w"(a), "w"(b)
16698 : /* No clobbers */);
16699 return result;
16702 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16703 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
16705 uint8x8_t result;
16706 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
16707 : "=w"(result)
16708 : "w"(a), "w"(b)
16709 : /* No clobbers */);
16710 return result;
16713 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16714 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
16716 uint16x4_t result;
16717 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
16718 : "=w"(result)
16719 : "w"(a), "w"(b)
16720 : /* No clobbers */);
16721 return result;
16724 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16725 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
16727 uint32x2_t result;
16728 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
16729 : "=w"(result)
16730 : "w"(a), "w"(b)
16731 : /* No clobbers */);
16732 return result;
16735 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16736 vtrn1_f32 (float32x2_t a, float32x2_t b)
16738 float32x2_t result;
16739 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
16740 : "=w"(result)
16741 : "w"(a), "w"(b)
16742 : /* No clobbers */);
16743 return result;
16746 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16747 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
16749 poly8x8_t result;
16750 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
16751 : "=w"(result)
16752 : "w"(a), "w"(b)
16753 : /* No clobbers */);
16754 return result;
16757 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16758 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
16760 poly16x4_t result;
16761 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
16762 : "=w"(result)
16763 : "w"(a), "w"(b)
16764 : /* No clobbers */);
16765 return result;
16768 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16769 vtrn1_s8 (int8x8_t a, int8x8_t b)
16771 int8x8_t result;
16772 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
16773 : "=w"(result)
16774 : "w"(a), "w"(b)
16775 : /* No clobbers */);
16776 return result;
16779 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16780 vtrn1_s16 (int16x4_t a, int16x4_t b)
16782 int16x4_t result;
16783 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
16784 : "=w"(result)
16785 : "w"(a), "w"(b)
16786 : /* No clobbers */);
16787 return result;
16790 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16791 vtrn1_s32 (int32x2_t a, int32x2_t b)
16793 int32x2_t result;
16794 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
16795 : "=w"(result)
16796 : "w"(a), "w"(b)
16797 : /* No clobbers */);
16798 return result;
16801 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16802 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
16804 uint8x8_t result;
16805 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
16806 : "=w"(result)
16807 : "w"(a), "w"(b)
16808 : /* No clobbers */);
16809 return result;
16812 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16813 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
16815 uint16x4_t result;
16816 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
16817 : "=w"(result)
16818 : "w"(a), "w"(b)
16819 : /* No clobbers */);
16820 return result;
16823 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16824 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
16826 uint32x2_t result;
16827 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
16828 : "=w"(result)
16829 : "w"(a), "w"(b)
16830 : /* No clobbers */);
16831 return result;
16834 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16835 vtrn1q_f32 (float32x4_t a, float32x4_t b)
16837 float32x4_t result;
16838 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
16839 : "=w"(result)
16840 : "w"(a), "w"(b)
16841 : /* No clobbers */);
16842 return result;
16845 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16846 vtrn1q_f64 (float64x2_t a, float64x2_t b)
16848 float64x2_t result;
16849 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
16850 : "=w"(result)
16851 : "w"(a), "w"(b)
16852 : /* No clobbers */);
16853 return result;
16856 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16857 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
16859 poly8x16_t result;
16860 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
16861 : "=w"(result)
16862 : "w"(a), "w"(b)
16863 : /* No clobbers */);
16864 return result;
16867 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16868 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
16870 poly16x8_t result;
16871 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
16872 : "=w"(result)
16873 : "w"(a), "w"(b)
16874 : /* No clobbers */);
16875 return result;
16878 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16879 vtrn1q_s8 (int8x16_t a, int8x16_t b)
16881 int8x16_t result;
16882 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
16883 : "=w"(result)
16884 : "w"(a), "w"(b)
16885 : /* No clobbers */);
16886 return result;
16889 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16890 vtrn1q_s16 (int16x8_t a, int16x8_t b)
16892 int16x8_t result;
16893 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
16894 : "=w"(result)
16895 : "w"(a), "w"(b)
16896 : /* No clobbers */);
16897 return result;
16900 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16901 vtrn1q_s32 (int32x4_t a, int32x4_t b)
16903 int32x4_t result;
16904 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
16905 : "=w"(result)
16906 : "w"(a), "w"(b)
16907 : /* No clobbers */);
16908 return result;
16911 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16912 vtrn1q_s64 (int64x2_t a, int64x2_t b)
16914 int64x2_t result;
16915 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
16916 : "=w"(result)
16917 : "w"(a), "w"(b)
16918 : /* No clobbers */);
16919 return result;
16922 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16923 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
16925 uint8x16_t result;
16926 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
16927 : "=w"(result)
16928 : "w"(a), "w"(b)
16929 : /* No clobbers */);
16930 return result;
16933 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16934 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
16936 uint16x8_t result;
16937 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
16938 : "=w"(result)
16939 : "w"(a), "w"(b)
16940 : /* No clobbers */);
16941 return result;
16944 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16945 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
16947 uint32x4_t result;
16948 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
16949 : "=w"(result)
16950 : "w"(a), "w"(b)
16951 : /* No clobbers */);
16952 return result;
16955 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16956 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
16958 uint64x2_t result;
16959 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
16960 : "=w"(result)
16961 : "w"(a), "w"(b)
16962 : /* No clobbers */);
16963 return result;
16966 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16967 vtrn2_f32 (float32x2_t a, float32x2_t b)
16969 float32x2_t result;
16970 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
16971 : "=w"(result)
16972 : "w"(a), "w"(b)
16973 : /* No clobbers */);
16974 return result;
16977 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16978 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
16980 poly8x8_t result;
16981 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
16982 : "=w"(result)
16983 : "w"(a), "w"(b)
16984 : /* No clobbers */);
16985 return result;
16988 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16989 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
16991 poly16x4_t result;
16992 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
16993 : "=w"(result)
16994 : "w"(a), "w"(b)
16995 : /* No clobbers */);
16996 return result;
16999 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17000 vtrn2_s8 (int8x8_t a, int8x8_t b)
17002 int8x8_t result;
17003 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
17004 : "=w"(result)
17005 : "w"(a), "w"(b)
17006 : /* No clobbers */);
17007 return result;
17010 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17011 vtrn2_s16 (int16x4_t a, int16x4_t b)
17013 int16x4_t result;
17014 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
17015 : "=w"(result)
17016 : "w"(a), "w"(b)
17017 : /* No clobbers */);
17018 return result;
17021 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17022 vtrn2_s32 (int32x2_t a, int32x2_t b)
17024 int32x2_t result;
17025 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
17026 : "=w"(result)
17027 : "w"(a), "w"(b)
17028 : /* No clobbers */);
17029 return result;
17032 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17033 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
17035 uint8x8_t result;
17036 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
17037 : "=w"(result)
17038 : "w"(a), "w"(b)
17039 : /* No clobbers */);
17040 return result;
17043 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17044 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
17046 uint16x4_t result;
17047 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
17048 : "=w"(result)
17049 : "w"(a), "w"(b)
17050 : /* No clobbers */);
17051 return result;
17054 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17055 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
17057 uint32x2_t result;
17058 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
17059 : "=w"(result)
17060 : "w"(a), "w"(b)
17061 : /* No clobbers */);
17062 return result;
17065 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17066 vtrn2q_f32 (float32x4_t a, float32x4_t b)
17068 float32x4_t result;
17069 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
17070 : "=w"(result)
17071 : "w"(a), "w"(b)
17072 : /* No clobbers */);
17073 return result;
17076 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17077 vtrn2q_f64 (float64x2_t a, float64x2_t b)
17079 float64x2_t result;
17080 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
17081 : "=w"(result)
17082 : "w"(a), "w"(b)
17083 : /* No clobbers */);
17084 return result;
17087 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17088 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
17090 poly8x16_t result;
17091 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
17092 : "=w"(result)
17093 : "w"(a), "w"(b)
17094 : /* No clobbers */);
17095 return result;
17098 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17099 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
17101 poly16x8_t result;
17102 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
17103 : "=w"(result)
17104 : "w"(a), "w"(b)
17105 : /* No clobbers */);
17106 return result;
17109 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17110 vtrn2q_s8 (int8x16_t a, int8x16_t b)
17112 int8x16_t result;
17113 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
17114 : "=w"(result)
17115 : "w"(a), "w"(b)
17116 : /* No clobbers */);
17117 return result;
17120 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17121 vtrn2q_s16 (int16x8_t a, int16x8_t b)
17123 int16x8_t result;
17124 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
17125 : "=w"(result)
17126 : "w"(a), "w"(b)
17127 : /* No clobbers */);
17128 return result;
17131 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17132 vtrn2q_s32 (int32x4_t a, int32x4_t b)
17134 int32x4_t result;
17135 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
17136 : "=w"(result)
17137 : "w"(a), "w"(b)
17138 : /* No clobbers */);
17139 return result;
17142 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17143 vtrn2q_s64 (int64x2_t a, int64x2_t b)
17145 int64x2_t result;
17146 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
17147 : "=w"(result)
17148 : "w"(a), "w"(b)
17149 : /* No clobbers */);
17150 return result;
17153 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17154 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
17156 uint8x16_t result;
17157 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
17158 : "=w"(result)
17159 : "w"(a), "w"(b)
17160 : /* No clobbers */);
17161 return result;
17164 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17165 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
17167 uint16x8_t result;
17168 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
17169 : "=w"(result)
17170 : "w"(a), "w"(b)
17171 : /* No clobbers */);
17172 return result;
17175 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17176 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
17178 uint32x4_t result;
17179 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
17180 : "=w"(result)
17181 : "w"(a), "w"(b)
17182 : /* No clobbers */);
17183 return result;
17186 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17187 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
17189 uint64x2_t result;
17190 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
17191 : "=w"(result)
17192 : "w"(a), "w"(b)
17193 : /* No clobbers */);
17194 return result;
17197 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17198 vtst_p8 (poly8x8_t a, poly8x8_t b)
17200 uint8x8_t result;
17201 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
17202 : "=w"(result)
17203 : "w"(a), "w"(b)
17204 : /* No clobbers */);
17205 return result;
17208 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17209 vtst_p16 (poly16x4_t a, poly16x4_t b)
17211 uint16x4_t result;
17212 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
17213 : "=w"(result)
17214 : "w"(a), "w"(b)
17215 : /* No clobbers */);
17216 return result;
17219 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17220 vtstq_p8 (poly8x16_t a, poly8x16_t b)
17222 uint8x16_t result;
17223 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
17224 : "=w"(result)
17225 : "w"(a), "w"(b)
17226 : /* No clobbers */);
17227 return result;
17230 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17231 vtstq_p16 (poly16x8_t a, poly16x8_t b)
17233 uint16x8_t result;
17234 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
17235 : "=w"(result)
17236 : "w"(a), "w"(b)
17237 : /* No clobbers */);
17238 return result;
17240 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17241 vuzp1_f32 (float32x2_t a, float32x2_t b)
17243 float32x2_t result;
17244 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
17245 : "=w"(result)
17246 : "w"(a), "w"(b)
17247 : /* No clobbers */);
17248 return result;
17251 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17252 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
17254 poly8x8_t result;
17255 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
17256 : "=w"(result)
17257 : "w"(a), "w"(b)
17258 : /* No clobbers */);
17259 return result;
17262 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17263 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
17265 poly16x4_t result;
17266 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
17267 : "=w"(result)
17268 : "w"(a), "w"(b)
17269 : /* No clobbers */);
17270 return result;
17273 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17274 vuzp1_s8 (int8x8_t a, int8x8_t b)
17276 int8x8_t result;
17277 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
17278 : "=w"(result)
17279 : "w"(a), "w"(b)
17280 : /* No clobbers */);
17281 return result;
17284 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17285 vuzp1_s16 (int16x4_t a, int16x4_t b)
17287 int16x4_t result;
17288 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
17289 : "=w"(result)
17290 : "w"(a), "w"(b)
17291 : /* No clobbers */);
17292 return result;
17295 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17296 vuzp1_s32 (int32x2_t a, int32x2_t b)
17298 int32x2_t result;
17299 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
17300 : "=w"(result)
17301 : "w"(a), "w"(b)
17302 : /* No clobbers */);
17303 return result;
17306 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17307 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
17309 uint8x8_t result;
17310 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
17311 : "=w"(result)
17312 : "w"(a), "w"(b)
17313 : /* No clobbers */);
17314 return result;
17317 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17318 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
17320 uint16x4_t result;
17321 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
17322 : "=w"(result)
17323 : "w"(a), "w"(b)
17324 : /* No clobbers */);
17325 return result;
17328 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17329 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
17331 uint32x2_t result;
17332 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
17333 : "=w"(result)
17334 : "w"(a), "w"(b)
17335 : /* No clobbers */);
17336 return result;
17339 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17340 vuzp1q_f32 (float32x4_t a, float32x4_t b)
17342 float32x4_t result;
17343 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
17344 : "=w"(result)
17345 : "w"(a), "w"(b)
17346 : /* No clobbers */);
17347 return result;
17350 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17351 vuzp1q_f64 (float64x2_t a, float64x2_t b)
17353 float64x2_t result;
17354 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
17355 : "=w"(result)
17356 : "w"(a), "w"(b)
17357 : /* No clobbers */);
17358 return result;
17361 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17362 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
17364 poly8x16_t result;
17365 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
17366 : "=w"(result)
17367 : "w"(a), "w"(b)
17368 : /* No clobbers */);
17369 return result;
17372 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17373 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
17375 poly16x8_t result;
17376 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
17377 : "=w"(result)
17378 : "w"(a), "w"(b)
17379 : /* No clobbers */);
17380 return result;
17383 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17384 vuzp1q_s8 (int8x16_t a, int8x16_t b)
17386 int8x16_t result;
17387 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
17388 : "=w"(result)
17389 : "w"(a), "w"(b)
17390 : /* No clobbers */);
17391 return result;
17394 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17395 vuzp1q_s16 (int16x8_t a, int16x8_t b)
17397 int16x8_t result;
17398 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
17399 : "=w"(result)
17400 : "w"(a), "w"(b)
17401 : /* No clobbers */);
17402 return result;
17405 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17406 vuzp1q_s32 (int32x4_t a, int32x4_t b)
17408 int32x4_t result;
17409 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
17410 : "=w"(result)
17411 : "w"(a), "w"(b)
17412 : /* No clobbers */);
17413 return result;
17416 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17417 vuzp1q_s64 (int64x2_t a, int64x2_t b)
17419 int64x2_t result;
17420 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
17421 : "=w"(result)
17422 : "w"(a), "w"(b)
17423 : /* No clobbers */);
17424 return result;
17427 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17428 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
17430 uint8x16_t result;
17431 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
17432 : "=w"(result)
17433 : "w"(a), "w"(b)
17434 : /* No clobbers */);
17435 return result;
17438 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17439 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
17441 uint16x8_t result;
17442 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
17443 : "=w"(result)
17444 : "w"(a), "w"(b)
17445 : /* No clobbers */);
17446 return result;
17449 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17450 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
17452 uint32x4_t result;
17453 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
17454 : "=w"(result)
17455 : "w"(a), "w"(b)
17456 : /* No clobbers */);
17457 return result;
17460 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17461 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
17463 uint64x2_t result;
17464 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
17465 : "=w"(result)
17466 : "w"(a), "w"(b)
17467 : /* No clobbers */);
17468 return result;
17471 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17472 vuzp2_f32 (float32x2_t a, float32x2_t b)
17474 float32x2_t result;
17475 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
17476 : "=w"(result)
17477 : "w"(a), "w"(b)
17478 : /* No clobbers */);
17479 return result;
17482 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17483 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
17485 poly8x8_t result;
17486 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
17487 : "=w"(result)
17488 : "w"(a), "w"(b)
17489 : /* No clobbers */);
17490 return result;
17493 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17494 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
17496 poly16x4_t result;
17497 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
17498 : "=w"(result)
17499 : "w"(a), "w"(b)
17500 : /* No clobbers */);
17501 return result;
17504 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17505 vuzp2_s8 (int8x8_t a, int8x8_t b)
17507 int8x8_t result;
17508 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
17509 : "=w"(result)
17510 : "w"(a), "w"(b)
17511 : /* No clobbers */);
17512 return result;
17515 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17516 vuzp2_s16 (int16x4_t a, int16x4_t b)
17518 int16x4_t result;
17519 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
17520 : "=w"(result)
17521 : "w"(a), "w"(b)
17522 : /* No clobbers */);
17523 return result;
17526 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17527 vuzp2_s32 (int32x2_t a, int32x2_t b)
17529 int32x2_t result;
17530 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
17531 : "=w"(result)
17532 : "w"(a), "w"(b)
17533 : /* No clobbers */);
17534 return result;
17537 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17538 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
17540 uint8x8_t result;
17541 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
17542 : "=w"(result)
17543 : "w"(a), "w"(b)
17544 : /* No clobbers */);
17545 return result;
17548 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17549 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
17551 uint16x4_t result;
17552 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
17553 : "=w"(result)
17554 : "w"(a), "w"(b)
17555 : /* No clobbers */);
17556 return result;
17559 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17560 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
17562 uint32x2_t result;
17563 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
17564 : "=w"(result)
17565 : "w"(a), "w"(b)
17566 : /* No clobbers */);
17567 return result;
17570 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17571 vuzp2q_f32 (float32x4_t a, float32x4_t b)
17573 float32x4_t result;
17574 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
17575 : "=w"(result)
17576 : "w"(a), "w"(b)
17577 : /* No clobbers */);
17578 return result;
17581 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17582 vuzp2q_f64 (float64x2_t a, float64x2_t b)
17584 float64x2_t result;
17585 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
17586 : "=w"(result)
17587 : "w"(a), "w"(b)
17588 : /* No clobbers */);
17589 return result;
17592 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17593 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
17595 poly8x16_t result;
17596 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
17597 : "=w"(result)
17598 : "w"(a), "w"(b)
17599 : /* No clobbers */);
17600 return result;
17603 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17604 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
17606 poly16x8_t result;
17607 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
17608 : "=w"(result)
17609 : "w"(a), "w"(b)
17610 : /* No clobbers */);
17611 return result;
17614 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17615 vuzp2q_s8 (int8x16_t a, int8x16_t b)
17617 int8x16_t result;
17618 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
17619 : "=w"(result)
17620 : "w"(a), "w"(b)
17621 : /* No clobbers */);
17622 return result;
17625 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17626 vuzp2q_s16 (int16x8_t a, int16x8_t b)
17628 int16x8_t result;
17629 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
17630 : "=w"(result)
17631 : "w"(a), "w"(b)
17632 : /* No clobbers */);
17633 return result;
17636 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17637 vuzp2q_s32 (int32x4_t a, int32x4_t b)
17639 int32x4_t result;
17640 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
17641 : "=w"(result)
17642 : "w"(a), "w"(b)
17643 : /* No clobbers */);
17644 return result;
17647 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17648 vuzp2q_s64 (int64x2_t a, int64x2_t b)
17650 int64x2_t result;
17651 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
17652 : "=w"(result)
17653 : "w"(a), "w"(b)
17654 : /* No clobbers */);
17655 return result;
17658 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17659 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
17661 uint8x16_t result;
17662 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
17663 : "=w"(result)
17664 : "w"(a), "w"(b)
17665 : /* No clobbers */);
17666 return result;
17669 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17670 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
17672 uint16x8_t result;
17673 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
17674 : "=w"(result)
17675 : "w"(a), "w"(b)
17676 : /* No clobbers */);
17677 return result;
17680 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17681 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
17683 uint32x4_t result;
17684 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
17685 : "=w"(result)
17686 : "w"(a), "w"(b)
17687 : /* No clobbers */);
17688 return result;
17691 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17692 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
17694 uint64x2_t result;
17695 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
17696 : "=w"(result)
17697 : "w"(a), "w"(b)
17698 : /* No clobbers */);
17699 return result;
17702 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17703 vzip1_f32 (float32x2_t a, float32x2_t b)
17705 float32x2_t result;
17706 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
17707 : "=w"(result)
17708 : "w"(a), "w"(b)
17709 : /* No clobbers */);
17710 return result;
17713 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17714 vzip1_p8 (poly8x8_t a, poly8x8_t b)
17716 poly8x8_t result;
17717 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
17718 : "=w"(result)
17719 : "w"(a), "w"(b)
17720 : /* No clobbers */);
17721 return result;
17724 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17725 vzip1_p16 (poly16x4_t a, poly16x4_t b)
17727 poly16x4_t result;
17728 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
17729 : "=w"(result)
17730 : "w"(a), "w"(b)
17731 : /* No clobbers */);
17732 return result;
17735 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17736 vzip1_s8 (int8x8_t a, int8x8_t b)
17738 int8x8_t result;
17739 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
17740 : "=w"(result)
17741 : "w"(a), "w"(b)
17742 : /* No clobbers */);
17743 return result;
17746 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17747 vzip1_s16 (int16x4_t a, int16x4_t b)
17749 int16x4_t result;
17750 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
17751 : "=w"(result)
17752 : "w"(a), "w"(b)
17753 : /* No clobbers */);
17754 return result;
17757 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17758 vzip1_s32 (int32x2_t a, int32x2_t b)
17760 int32x2_t result;
17761 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
17762 : "=w"(result)
17763 : "w"(a), "w"(b)
17764 : /* No clobbers */);
17765 return result;
17768 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17769 vzip1_u8 (uint8x8_t a, uint8x8_t b)
17771 uint8x8_t result;
17772 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
17773 : "=w"(result)
17774 : "w"(a), "w"(b)
17775 : /* No clobbers */);
17776 return result;
17779 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17780 vzip1_u16 (uint16x4_t a, uint16x4_t b)
17782 uint16x4_t result;
17783 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
17784 : "=w"(result)
17785 : "w"(a), "w"(b)
17786 : /* No clobbers */);
17787 return result;
17790 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17791 vzip1_u32 (uint32x2_t a, uint32x2_t b)
17793 uint32x2_t result;
17794 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
17795 : "=w"(result)
17796 : "w"(a), "w"(b)
17797 : /* No clobbers */);
17798 return result;
17801 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17802 vzip1q_f32 (float32x4_t a, float32x4_t b)
17804 float32x4_t result;
17805 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
17806 : "=w"(result)
17807 : "w"(a), "w"(b)
17808 : /* No clobbers */);
17809 return result;
17812 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17813 vzip1q_f64 (float64x2_t a, float64x2_t b)
17815 float64x2_t result;
17816 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
17817 : "=w"(result)
17818 : "w"(a), "w"(b)
17819 : /* No clobbers */);
17820 return result;
17823 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
17824 vzip1q_p8 (poly8x16_t a, poly8x16_t b)
17826 poly8x16_t result;
17827 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
17828 : "=w"(result)
17829 : "w"(a), "w"(b)
17830 : /* No clobbers */);
17831 return result;
17834 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
17835 vzip1q_p16 (poly16x8_t a, poly16x8_t b)
17837 poly16x8_t result;
17838 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
17839 : "=w"(result)
17840 : "w"(a), "w"(b)
17841 : /* No clobbers */);
17842 return result;
17845 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17846 vzip1q_s8 (int8x16_t a, int8x16_t b)
17848 int8x16_t result;
17849 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
17850 : "=w"(result)
17851 : "w"(a), "w"(b)
17852 : /* No clobbers */);
17853 return result;
17856 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17857 vzip1q_s16 (int16x8_t a, int16x8_t b)
17859 int16x8_t result;
17860 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
17861 : "=w"(result)
17862 : "w"(a), "w"(b)
17863 : /* No clobbers */);
17864 return result;
17867 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17868 vzip1q_s32 (int32x4_t a, int32x4_t b)
17870 int32x4_t result;
17871 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
17872 : "=w"(result)
17873 : "w"(a), "w"(b)
17874 : /* No clobbers */);
17875 return result;
17878 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17879 vzip1q_s64 (int64x2_t a, int64x2_t b)
17881 int64x2_t result;
17882 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
17883 : "=w"(result)
17884 : "w"(a), "w"(b)
17885 : /* No clobbers */);
17886 return result;
17889 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17890 vzip1q_u8 (uint8x16_t a, uint8x16_t b)
17892 uint8x16_t result;
17893 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
17894 : "=w"(result)
17895 : "w"(a), "w"(b)
17896 : /* No clobbers */);
17897 return result;
17900 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17901 vzip1q_u16 (uint16x8_t a, uint16x8_t b)
17903 uint16x8_t result;
17904 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
17905 : "=w"(result)
17906 : "w"(a), "w"(b)
17907 : /* No clobbers */);
17908 return result;
17911 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17912 vzip1q_u32 (uint32x4_t a, uint32x4_t b)
17914 uint32x4_t result;
17915 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
17916 : "=w"(result)
17917 : "w"(a), "w"(b)
17918 : /* No clobbers */);
17919 return result;
17922 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17923 vzip1q_u64 (uint64x2_t a, uint64x2_t b)
17925 uint64x2_t result;
17926 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
17927 : "=w"(result)
17928 : "w"(a), "w"(b)
17929 : /* No clobbers */);
17930 return result;
17933 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17934 vzip2_f32 (float32x2_t a, float32x2_t b)
17936 float32x2_t result;
17937 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
17938 : "=w"(result)
17939 : "w"(a), "w"(b)
17940 : /* No clobbers */);
17941 return result;
17944 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17945 vzip2_p8 (poly8x8_t a, poly8x8_t b)
17947 poly8x8_t result;
17948 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
17949 : "=w"(result)
17950 : "w"(a), "w"(b)
17951 : /* No clobbers */);
17952 return result;
17955 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
17956 vzip2_p16 (poly16x4_t a, poly16x4_t b)
17958 poly16x4_t result;
17959 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
17960 : "=w"(result)
17961 : "w"(a), "w"(b)
17962 : /* No clobbers */);
17963 return result;
17966 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17967 vzip2_s8 (int8x8_t a, int8x8_t b)
17969 int8x8_t result;
17970 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
17971 : "=w"(result)
17972 : "w"(a), "w"(b)
17973 : /* No clobbers */);
17974 return result;
17977 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17978 vzip2_s16 (int16x4_t a, int16x4_t b)
17980 int16x4_t result;
17981 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
17982 : "=w"(result)
17983 : "w"(a), "w"(b)
17984 : /* No clobbers */);
17985 return result;
17988 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17989 vzip2_s32 (int32x2_t a, int32x2_t b)
17991 int32x2_t result;
17992 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
17993 : "=w"(result)
17994 : "w"(a), "w"(b)
17995 : /* No clobbers */);
17996 return result;
17999 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18000 vzip2_u8 (uint8x8_t a, uint8x8_t b)
18002 uint8x8_t result;
18003 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
18004 : "=w"(result)
18005 : "w"(a), "w"(b)
18006 : /* No clobbers */);
18007 return result;
18010 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18011 vzip2_u16 (uint16x4_t a, uint16x4_t b)
18013 uint16x4_t result;
18014 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
18015 : "=w"(result)
18016 : "w"(a), "w"(b)
18017 : /* No clobbers */);
18018 return result;
18021 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18022 vzip2_u32 (uint32x2_t a, uint32x2_t b)
18024 uint32x2_t result;
18025 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
18026 : "=w"(result)
18027 : "w"(a), "w"(b)
18028 : /* No clobbers */);
18029 return result;
18032 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18033 vzip2q_f32 (float32x4_t a, float32x4_t b)
18035 float32x4_t result;
18036 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
18037 : "=w"(result)
18038 : "w"(a), "w"(b)
18039 : /* No clobbers */);
18040 return result;
18043 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18044 vzip2q_f64 (float64x2_t a, float64x2_t b)
18046 float64x2_t result;
18047 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
18048 : "=w"(result)
18049 : "w"(a), "w"(b)
18050 : /* No clobbers */);
18051 return result;
18054 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18055 vzip2q_p8 (poly8x16_t a, poly8x16_t b)
18057 poly8x16_t result;
18058 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
18059 : "=w"(result)
18060 : "w"(a), "w"(b)
18061 : /* No clobbers */);
18062 return result;
18065 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18066 vzip2q_p16 (poly16x8_t a, poly16x8_t b)
18068 poly16x8_t result;
18069 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
18070 : "=w"(result)
18071 : "w"(a), "w"(b)
18072 : /* No clobbers */);
18073 return result;
18076 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18077 vzip2q_s8 (int8x16_t a, int8x16_t b)
18079 int8x16_t result;
18080 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
18081 : "=w"(result)
18082 : "w"(a), "w"(b)
18083 : /* No clobbers */);
18084 return result;
18087 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18088 vzip2q_s16 (int16x8_t a, int16x8_t b)
18090 int16x8_t result;
18091 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
18092 : "=w"(result)
18093 : "w"(a), "w"(b)
18094 : /* No clobbers */);
18095 return result;
18098 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18099 vzip2q_s32 (int32x4_t a, int32x4_t b)
18101 int32x4_t result;
18102 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
18103 : "=w"(result)
18104 : "w"(a), "w"(b)
18105 : /* No clobbers */);
18106 return result;
18109 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18110 vzip2q_s64 (int64x2_t a, int64x2_t b)
18112 int64x2_t result;
18113 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
18114 : "=w"(result)
18115 : "w"(a), "w"(b)
18116 : /* No clobbers */);
18117 return result;
18120 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18121 vzip2q_u8 (uint8x16_t a, uint8x16_t b)
18123 uint8x16_t result;
18124 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
18125 : "=w"(result)
18126 : "w"(a), "w"(b)
18127 : /* No clobbers */);
18128 return result;
18131 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18132 vzip2q_u16 (uint16x8_t a, uint16x8_t b)
18134 uint16x8_t result;
18135 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
18136 : "=w"(result)
18137 : "w"(a), "w"(b)
18138 : /* No clobbers */);
18139 return result;
18142 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18143 vzip2q_u32 (uint32x4_t a, uint32x4_t b)
18145 uint32x4_t result;
18146 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
18147 : "=w"(result)
18148 : "w"(a), "w"(b)
18149 : /* No clobbers */);
18150 return result;
18153 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18154 vzip2q_u64 (uint64x2_t a, uint64x2_t b)
18156 uint64x2_t result;
18157 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
18158 : "=w"(result)
18159 : "w"(a), "w"(b)
18160 : /* No clobbers */);
18161 return result;
18164 /* End of temporary inline asm implementations. */
18166 /* Start of temporary inline asm for vldn, vstn and friends. */
18168 /* Create struct element types for duplicating loads.
18170 Create 2 element structures of:
18172 +------+----+----+----+----+
18173 | | 8 | 16 | 32 | 64 |
18174 +------+----+----+----+----+
18175 |int | Y | Y | N | N |
18176 +------+----+----+----+----+
18177 |uint | Y | Y | N | N |
18178 +------+----+----+----+----+
18179 |float | - | - | N | N |
18180 +------+----+----+----+----+
18181 |poly | Y | Y | - | - |
18182 +------+----+----+----+----+
18184 Create 3 element structures of:
18186 +------+----+----+----+----+
18187 | | 8 | 16 | 32 | 64 |
18188 +------+----+----+----+----+
18189 |int | Y | Y | Y | Y |
18190 +------+----+----+----+----+
18191 |uint | Y | Y | Y | Y |
18192 +------+----+----+----+----+
18193 |float | - | - | Y | Y |
18194 +------+----+----+----+----+
18195 |poly | Y | Y | - | - |
18196 +------+----+----+----+----+
18198 Create 4 element structures of:
18200 +------+----+----+----+----+
18201 | | 8 | 16 | 32 | 64 |
18202 +------+----+----+----+----+
18203 |int | Y | N | N | Y |
18204 +------+----+----+----+----+
18205 |uint | Y | N | N | Y |
18206 +------+----+----+----+----+
18207 |float | - | - | N | Y |
18208 +------+----+----+----+----+
18209 |poly | Y | N | - | - |
18210 +------+----+----+----+----+
18212 This is required for casting memory reference. */
18213 #define __STRUCTN(t, sz, nelem) \
18214 typedef struct t ## sz ## x ## nelem ## _t { \
18215 t ## sz ## _t val[nelem]; \
18216 } t ## sz ## x ## nelem ## _t;
18218 /* 2-element structs. */
18219 __STRUCTN (int, 8, 2)
18220 __STRUCTN (int, 16, 2)
18221 __STRUCTN (uint, 8, 2)
18222 __STRUCTN (uint, 16, 2)
18223 __STRUCTN (poly, 8, 2)
18224 __STRUCTN (poly, 16, 2)
18225 /* 3-element structs. */
18226 __STRUCTN (int, 8, 3)
18227 __STRUCTN (int, 16, 3)
18228 __STRUCTN (int, 32, 3)
18229 __STRUCTN (int, 64, 3)
18230 __STRUCTN (uint, 8, 3)
18231 __STRUCTN (uint, 16, 3)
18232 __STRUCTN (uint, 32, 3)
18233 __STRUCTN (uint, 64, 3)
18234 __STRUCTN (float, 32, 3)
18235 __STRUCTN (float, 64, 3)
18236 __STRUCTN (poly, 8, 3)
18237 __STRUCTN (poly, 16, 3)
18238 /* 4-element structs. */
18239 __STRUCTN (int, 8, 4)
18240 __STRUCTN (int, 64, 4)
18241 __STRUCTN (uint, 8, 4)
18242 __STRUCTN (uint, 64, 4)
18243 __STRUCTN (poly, 8, 4)
18244 __STRUCTN (float, 64, 4)
18245 #undef __STRUCTN
18247 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
18248 regsuffix, funcsuffix, Q) \
18249 __extension__ static __inline rettype \
18250 __attribute__ ((__always_inline__)) \
18251 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
18253 rettype result; \
18254 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
18255 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
18256 : "=Q"(result) \
18257 : "Q"(*(const structtype *)ptr) \
18258 : "memory", "v16", "v17"); \
18259 return result; \
18262 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
18263 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
18264 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
18265 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
18266 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
18267 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
18268 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
18269 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
18270 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
18271 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
18272 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
18273 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
18274 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
18275 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
18276 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
18277 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
18278 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
18279 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
18280 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
18281 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
18282 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
18283 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
18284 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
18285 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
18287 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
18288 lnsuffix, funcsuffix, Q) \
18289 __extension__ static __inline rettype \
18290 __attribute__ ((__always_inline__)) \
18291 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18292 rettype b, const int c) \
18294 rettype result; \
18295 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
18296 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
18297 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
18298 : "=Q"(result) \
18299 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
18300 : "memory", "v16", "v17"); \
18301 return result; \
18304 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
18305 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
18306 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
18307 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
18308 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
18309 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
18310 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
18311 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
18312 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
18313 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
18314 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
18315 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
18316 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
18317 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
18318 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
18319 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
18320 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
18321 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
18322 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
18323 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
18324 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
18325 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
18326 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
18327 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
18329 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
18330 regsuffix, funcsuffix, Q) \
18331 __extension__ static __inline rettype \
18332 __attribute__ ((__always_inline__)) \
18333 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
18335 rettype result; \
18336 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
18337 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
18338 : "=Q"(result) \
18339 : "Q"(*(const structtype *)ptr) \
18340 : "memory", "v16", "v17", "v18"); \
18341 return result; \
18344 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
18345 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
18346 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
18347 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
18348 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
18349 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
18350 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
18351 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
18352 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
18353 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
18354 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
18355 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
18356 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
18357 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
18358 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
18359 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
18360 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
18361 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
18362 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
18363 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
18364 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
18365 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
18366 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
18367 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
18369 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
18370 lnsuffix, funcsuffix, Q) \
18371 __extension__ static __inline rettype \
18372 __attribute__ ((__always_inline__)) \
18373 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18374 rettype b, const int c) \
18376 rettype result; \
18377 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
18378 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
18379 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
18380 : "=Q"(result) \
18381 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
18382 : "memory", "v16", "v17", "v18"); \
18383 return result; \
18386 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
18387 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
18388 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
18389 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
18390 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
18391 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
18392 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
18393 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
18394 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
18395 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
18396 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
18397 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
18398 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
18399 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
18400 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
18401 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
18402 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
18403 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
18404 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
18405 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
18406 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
18407 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
18408 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
18409 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
18411 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
18412 regsuffix, funcsuffix, Q) \
18413 __extension__ static __inline rettype \
18414 __attribute__ ((__always_inline__)) \
18415 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
18417 rettype result; \
18418 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
18419 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
18420 : "=Q"(result) \
18421 : "Q"(*(const structtype *)ptr) \
18422 : "memory", "v16", "v17", "v18", "v19"); \
18423 return result; \
18426 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
18427 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
18428 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
18429 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
18430 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
18431 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
18432 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
18433 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
18434 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
18435 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
18436 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
18437 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
18438 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
18439 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
18440 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
18441 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
18442 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
18443 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
18444 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
18445 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
18446 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
18447 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
18448 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
18449 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
18451 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
18452 lnsuffix, funcsuffix, Q) \
18453 __extension__ static __inline rettype \
18454 __attribute__ ((__always_inline__)) \
18455 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18456 rettype b, const int c) \
18458 rettype result; \
18459 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
18460 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
18461 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
18462 : "=Q"(result) \
18463 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
18464 : "memory", "v16", "v17", "v18", "v19"); \
18465 return result; \
18468 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
18469 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
18470 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
18471 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
18472 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
18473 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
18474 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
18475 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
18476 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
18477 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
18478 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
18479 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
18480 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
18481 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
18482 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
18483 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
18484 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
18485 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
18486 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
18487 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
18488 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
18489 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
18490 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
18491 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
18493 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
18494 lnsuffix, funcsuffix, Q) \
18495 __extension__ static __inline void \
18496 __attribute__ ((__always_inline__)) \
18497 vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18498 intype b, const int c) \
18500 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
18501 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
18502 : "=Q"(*(intype *) ptr) \
18503 : "Q"(b), "i"(c) \
18504 : "memory", "v16", "v17"); \
18507 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
18508 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
18509 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
18510 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
18511 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
18512 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
18513 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
18514 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
18515 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
18516 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
18517 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
18518 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
18519 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
18520 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
18521 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
18522 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
18523 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
18524 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
18525 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
18526 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
18527 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
18528 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
18529 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
18530 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
18532 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
18533 lnsuffix, funcsuffix, Q) \
18534 __extension__ static __inline void \
18535 __attribute__ ((__always_inline__)) \
18536 vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18537 intype b, const int c) \
18539 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
18540 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
18541 : "=Q"(*(intype *) ptr) \
18542 : "Q"(b), "i"(c) \
18543 : "memory", "v16", "v17", "v18"); \
18546 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
18547 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
18548 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
18549 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
18550 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
18551 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
18552 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
18553 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
18554 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
18555 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
18556 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
18557 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
18558 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
18559 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
18560 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
18561 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
18562 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
18563 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
18564 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
18565 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
18566 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
18567 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
18568 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
18569 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
18571 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
18572 lnsuffix, funcsuffix, Q) \
18573 __extension__ static __inline void \
18574 __attribute__ ((__always_inline__)) \
18575 vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
18576 intype b, const int c) \
18578 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
18579 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
18580 : "=Q"(*(intype *) ptr) \
18581 : "Q"(b), "i"(c) \
18582 : "memory", "v16", "v17", "v18", "v19"); \
18585 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
18586 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
18587 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
18588 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
18589 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
18590 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
18591 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
18592 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
18593 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
18594 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
18595 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
18596 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
18597 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
18598 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
18599 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
18600 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
18601 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
18602 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
18603 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
18604 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
18605 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
18606 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
18607 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
18608 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
18610 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
18611 vaddlv_s32 (int32x2_t a)
18613 int64_t result;
18614 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
18615 return result;
18618 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18619 vaddlv_u32 (uint32x2_t a)
18621 uint64_t result;
18622 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
18623 return result;
18626 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18627 vaddv_s32 (int32x2_t a)
18629 int32_t result;
18630 __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18631 return result;
18634 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18635 vaddv_u32 (uint32x2_t a)
18637 uint32_t result;
18638 __asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18639 return result;
18642 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18643 vmaxnmv_f32 (float32x2_t a)
18645 float32_t result;
18646 __asm__ ("fmaxnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18647 return result;
18650 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18651 vminnmv_f32 (float32x2_t a)
18653 float32_t result;
18654 __asm__ ("fminnmp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18655 return result;
18658 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18659 vmaxnmvq_f64 (float64x2_t a)
18661 float64_t result;
18662 __asm__ ("fmaxnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : );
18663 return result;
18666 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18667 vmaxv_s32 (int32x2_t a)
18669 int32_t result;
18670 __asm__ ("smaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18671 return result;
18674 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18675 vmaxv_u32 (uint32x2_t a)
18677 uint32_t result;
18678 __asm__ ("umaxp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18679 return result;
18682 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18683 vminnmvq_f64 (float64x2_t a)
18685 float64_t result;
18686 __asm__ ("fminnmp %0.2d, %1.2d, %1.2d" : "=w"(result) : "w"(a) : );
18687 return result;
18690 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18691 vminv_s32 (int32x2_t a)
18693 int32_t result;
18694 __asm__ ("sminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18695 return result;
18698 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18699 vminv_u32 (uint32x2_t a)
18701 uint32_t result;
18702 __asm__ ("uminp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
18703 return result;
18706 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18707 vpaddd_s64 (int64x2_t __a)
18709 return __builtin_aarch64_addpdi (__a);
18712 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18713 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
18715 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
18718 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18719 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
18721 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
18724 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18725 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
18727 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
18730 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18731 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
18733 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
18736 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18737 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
18739 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
18742 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18743 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
18745 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
18748 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18749 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
18751 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
18754 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18755 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
18757 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
18760 /* Table intrinsics. */
18762 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18763 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
18765 poly8x8_t result;
18766 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
18767 : "=w"(result)
18768 : "w"(a), "w"(b)
18769 : /* No clobbers */);
18770 return result;
18773 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18774 vqtbl1_s8 (int8x16_t a, int8x8_t b)
18776 int8x8_t result;
18777 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
18778 : "=w"(result)
18779 : "w"(a), "w"(b)
18780 : /* No clobbers */);
18781 return result;
18784 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18785 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
18787 uint8x8_t result;
18788 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
18789 : "=w"(result)
18790 : "w"(a), "w"(b)
18791 : /* No clobbers */);
18792 return result;
18795 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18796 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
18798 poly8x16_t result;
18799 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
18800 : "=w"(result)
18801 : "w"(a), "w"(b)
18802 : /* No clobbers */);
18803 return result;
18806 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18807 vqtbl1q_s8 (int8x16_t a, int8x16_t b)
18809 int8x16_t result;
18810 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
18811 : "=w"(result)
18812 : "w"(a), "w"(b)
18813 : /* No clobbers */);
18814 return result;
18817 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18818 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
18820 uint8x16_t result;
18821 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
18822 : "=w"(result)
18823 : "w"(a), "w"(b)
18824 : /* No clobbers */);
18825 return result;
18828 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18829 vqtbl2_s8 (int8x16x2_t tab, int8x8_t idx)
18831 int8x8_t result;
18832 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18833 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
18834 :"=w"(result)
18835 :"Q"(tab),"w"(idx)
18836 :"memory", "v16", "v17");
18837 return result;
18840 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18841 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
18843 uint8x8_t result;
18844 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18845 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
18846 :"=w"(result)
18847 :"Q"(tab),"w"(idx)
18848 :"memory", "v16", "v17");
18849 return result;
18852 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18853 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
18855 poly8x8_t result;
18856 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18857 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
18858 :"=w"(result)
18859 :"Q"(tab),"w"(idx)
18860 :"memory", "v16", "v17");
18861 return result;
18864 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18865 vqtbl2q_s8 (int8x16x2_t tab, int8x16_t idx)
18867 int8x16_t result;
18868 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18869 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
18870 :"=w"(result)
18871 :"Q"(tab),"w"(idx)
18872 :"memory", "v16", "v17");
18873 return result;
18876 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18877 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
18879 uint8x16_t result;
18880 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18881 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
18882 :"=w"(result)
18883 :"Q"(tab),"w"(idx)
18884 :"memory", "v16", "v17");
18885 return result;
18888 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18889 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
18891 poly8x16_t result;
18892 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
18893 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
18894 :"=w"(result)
18895 :"Q"(tab),"w"(idx)
18896 :"memory", "v16", "v17");
18897 return result;
18900 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18901 vqtbl3_s8 (int8x16x3_t tab, int8x8_t idx)
18903 int8x8_t result;
18904 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18905 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
18906 :"=w"(result)
18907 :"Q"(tab),"w"(idx)
18908 :"memory", "v16", "v17", "v18");
18909 return result;
18912 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18913 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
18915 uint8x8_t result;
18916 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18917 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
18918 :"=w"(result)
18919 :"Q"(tab),"w"(idx)
18920 :"memory", "v16", "v17", "v18");
18921 return result;
18924 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18925 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
18927 poly8x8_t result;
18928 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18929 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
18930 :"=w"(result)
18931 :"Q"(tab),"w"(idx)
18932 :"memory", "v16", "v17", "v18");
18933 return result;
18936 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18937 vqtbl3q_s8 (int8x16x3_t tab, int8x16_t idx)
18939 int8x16_t result;
18940 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18941 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
18942 :"=w"(result)
18943 :"Q"(tab),"w"(idx)
18944 :"memory", "v16", "v17", "v18");
18945 return result;
18948 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18949 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
18951 uint8x16_t result;
18952 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18953 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
18954 :"=w"(result)
18955 :"Q"(tab),"w"(idx)
18956 :"memory", "v16", "v17", "v18");
18957 return result;
18960 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18961 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
18963 poly8x16_t result;
18964 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
18965 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
18966 :"=w"(result)
18967 :"Q"(tab),"w"(idx)
18968 :"memory", "v16", "v17", "v18");
18969 return result;
18972 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18973 vqtbl4_s8 (int8x16x4_t tab, int8x8_t idx)
18975 int8x8_t result;
18976 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
18977 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
18978 :"=w"(result)
18979 :"Q"(tab),"w"(idx)
18980 :"memory", "v16", "v17", "v18", "v19");
18981 return result;
18984 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18985 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
18987 uint8x8_t result;
18988 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
18989 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
18990 :"=w"(result)
18991 :"Q"(tab),"w"(idx)
18992 :"memory", "v16", "v17", "v18", "v19");
18993 return result;
18996 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18997 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
18999 poly8x8_t result;
19000 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19001 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19002 :"=w"(result)
19003 :"Q"(tab),"w"(idx)
19004 :"memory", "v16", "v17", "v18", "v19");
19005 return result;
19009 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19010 vqtbl4q_s8 (int8x16x4_t tab, int8x16_t idx)
19012 int8x16_t result;
19013 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19014 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19015 :"=w"(result)
19016 :"Q"(tab),"w"(idx)
19017 :"memory", "v16", "v17", "v18", "v19");
19018 return result;
19021 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19022 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
19024 uint8x16_t result;
19025 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19026 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19027 :"=w"(result)
19028 :"Q"(tab),"w"(idx)
19029 :"memory", "v16", "v17", "v18", "v19");
19030 return result;
19033 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19034 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
19036 poly8x16_t result;
19037 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19038 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19039 :"=w"(result)
19040 :"Q"(tab),"w"(idx)
19041 :"memory", "v16", "v17", "v18", "v19");
19042 return result;
19046 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19047 vqtbx1_s8 (int8x8_t r, int8x16_t tab, int8x8_t idx)
19049 int8x8_t result = r;
19050 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
19051 : "+w"(result)
19052 : "w"(tab), "w"(idx)
19053 : /* No clobbers */);
19054 return result;
19057 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19058 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
19060 uint8x8_t result = r;
19061 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
19062 : "+w"(result)
19063 : "w"(tab), "w"(idx)
19064 : /* No clobbers */);
19065 return result;
19068 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19069 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
19071 poly8x8_t result = r;
19072 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
19073 : "+w"(result)
19074 : "w"(tab), "w"(idx)
19075 : /* No clobbers */);
19076 return result;
19079 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19080 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, int8x16_t idx)
19082 int8x16_t result = r;
19083 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
19084 : "+w"(result)
19085 : "w"(tab), "w"(idx)
19086 : /* No clobbers */);
19087 return result;
19090 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19091 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
19093 uint8x16_t result = r;
19094 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
19095 : "+w"(result)
19096 : "w"(tab), "w"(idx)
19097 : /* No clobbers */);
19098 return result;
19101 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19102 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
19104 poly8x16_t result = r;
19105 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
19106 : "+w"(result)
19107 : "w"(tab), "w"(idx)
19108 : /* No clobbers */);
19109 return result;
19112 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19113 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, int8x8_t idx)
19115 int8x8_t result = r;
19116 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19117 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19118 :"+w"(result)
19119 :"Q"(tab),"w"(idx)
19120 :"memory", "v16", "v17");
19121 return result;
19124 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19125 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
19127 uint8x8_t result = r;
19128 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19129 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19130 :"+w"(result)
19131 :"Q"(tab),"w"(idx)
19132 :"memory", "v16", "v17");
19133 return result;
19136 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19137 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
19139 poly8x8_t result = r;
19140 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19141 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
19142 :"+w"(result)
19143 :"Q"(tab),"w"(idx)
19144 :"memory", "v16", "v17");
19145 return result;
19149 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19150 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, int8x16_t idx)
19152 int8x16_t result = r;
19153 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19154 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19155 :"+w"(result)
19156 :"Q"(tab),"w"(idx)
19157 :"memory", "v16", "v17");
19158 return result;
19161 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19162 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
19164 uint8x16_t result = r;
19165 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19166 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19167 :"+w"(result)
19168 :"Q"(tab),"w"(idx)
19169 :"memory", "v16", "v17");
19170 return result;
19173 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19174 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
19176 poly8x16_t result = r;
19177 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
19178 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
19179 :"+w"(result)
19180 :"Q"(tab),"w"(idx)
19181 :"memory", "v16", "v17");
19182 return result;
19186 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19187 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, int8x8_t idx)
19189 int8x8_t result = r;
19190 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19191 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19192 :"+w"(result)
19193 :"Q"(tab),"w"(idx)
19194 :"memory", "v16", "v17", "v18");
19195 return result;
19198 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19199 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
19201 uint8x8_t result = r;
19202 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19203 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19204 :"+w"(result)
19205 :"Q"(tab),"w"(idx)
19206 :"memory", "v16", "v17", "v18");
19207 return result;
19210 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19211 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
19213 poly8x8_t result = r;
19214 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19215 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
19216 :"+w"(result)
19217 :"Q"(tab),"w"(idx)
19218 :"memory", "v16", "v17", "v18");
19219 return result;
19223 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19224 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, int8x16_t idx)
19226 int8x16_t result = r;
19227 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19228 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19229 :"+w"(result)
19230 :"Q"(tab),"w"(idx)
19231 :"memory", "v16", "v17", "v18");
19232 return result;
19235 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19236 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
19238 uint8x16_t result = r;
19239 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19240 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19241 :"+w"(result)
19242 :"Q"(tab),"w"(idx)
19243 :"memory", "v16", "v17", "v18");
19244 return result;
19247 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19248 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
19250 poly8x16_t result = r;
19251 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
19252 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
19253 :"+w"(result)
19254 :"Q"(tab),"w"(idx)
19255 :"memory", "v16", "v17", "v18");
19256 return result;
19260 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19261 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, int8x8_t idx)
19263 int8x8_t result = r;
19264 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19265 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19266 :"+w"(result)
19267 :"Q"(tab),"w"(idx)
19268 :"memory", "v16", "v17", "v18", "v19");
19269 return result;
19272 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19273 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
19275 uint8x8_t result = r;
19276 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19277 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19278 :"+w"(result)
19279 :"Q"(tab),"w"(idx)
19280 :"memory", "v16", "v17", "v18", "v19");
19281 return result;
19284 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19285 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
19287 poly8x8_t result = r;
19288 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19289 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
19290 :"+w"(result)
19291 :"Q"(tab),"w"(idx)
19292 :"memory", "v16", "v17", "v18", "v19");
19293 return result;
19297 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19298 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, int8x16_t idx)
19300 int8x16_t result = r;
19301 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19302 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19303 :"+w"(result)
19304 :"Q"(tab),"w"(idx)
19305 :"memory", "v16", "v17", "v18", "v19");
19306 return result;
19309 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19310 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
19312 uint8x16_t result = r;
19313 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19314 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19315 :"+w"(result)
19316 :"Q"(tab),"w"(idx)
19317 :"memory", "v16", "v17", "v18", "v19");
19318 return result;
19321 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19322 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
19324 poly8x16_t result = r;
19325 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
19326 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
19327 :"+w"(result)
19328 :"Q"(tab),"w"(idx)
19329 :"memory", "v16", "v17", "v18", "v19");
19330 return result;
19333 /* V7 legacy table intrinsics. */
19335 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19336 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
19338 int8x8_t result;
19339 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
19340 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19341 : "=w"(result)
19342 : "w"(temp), "w"(idx)
19343 : /* No clobbers */);
19344 return result;
19347 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19348 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
19350 uint8x8_t result;
19351 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
19352 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19353 : "=w"(result)
19354 : "w"(temp), "w"(idx)
19355 : /* No clobbers */);
19356 return result;
19359 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19360 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
19362 poly8x8_t result;
19363 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
19364 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19365 : "=w"(result)
19366 : "w"(temp), "w"(idx)
19367 : /* No clobbers */);
19368 return result;
19371 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19372 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
19374 int8x8_t result;
19375 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
19376 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19377 : "=w"(result)
19378 : "w"(temp), "w"(idx)
19379 : /* No clobbers */);
19380 return result;
19383 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19384 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
19386 uint8x8_t result;
19387 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
19388 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19389 : "=w"(result)
19390 : "w"(temp), "w"(idx)
19391 : /* No clobbers */);
19392 return result;
19395 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19396 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
19398 poly8x8_t result;
19399 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
19400 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
19401 : "=w"(result)
19402 : "w"(temp), "w"(idx)
19403 : /* No clobbers */);
19404 return result;
19407 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19408 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
19410 int8x8_t result;
19411 int8x16x2_t temp;
19412 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
19413 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
19414 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19415 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19416 : "=w"(result)
19417 : "Q"(temp), "w"(idx)
19418 : "v16", "v17", "memory");
19419 return result;
19422 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19423 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
19425 uint8x8_t result;
19426 uint8x16x2_t temp;
19427 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
19428 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
19429 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19430 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19431 : "=w"(result)
19432 : "Q"(temp), "w"(idx)
19433 : "v16", "v17", "memory");
19434 return result;
19437 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19438 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
19440 poly8x8_t result;
19441 poly8x16x2_t temp;
19442 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
19443 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
19444 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19445 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19446 : "=w"(result)
19447 : "Q"(temp), "w"(idx)
19448 : "v16", "v17", "memory");
19449 return result;
19452 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19453 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
19455 int8x8_t result;
19456 int8x16x2_t temp;
19457 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
19458 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
19459 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19460 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19461 : "=w"(result)
19462 : "Q"(temp), "w"(idx)
19463 : "v16", "v17", "memory");
19464 return result;
19467 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19468 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
19470 uint8x8_t result;
19471 uint8x16x2_t temp;
19472 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
19473 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
19474 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19475 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19476 : "=w"(result)
19477 : "Q"(temp), "w"(idx)
19478 : "v16", "v17", "memory");
19479 return result;
19482 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19483 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
19485 poly8x8_t result;
19486 poly8x16x2_t temp;
19487 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
19488 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
19489 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19490 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19491 : "=w"(result)
19492 : "Q"(temp), "w"(idx)
19493 : "v16", "v17", "memory");
19494 return result;
19497 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19498 vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
19500 int8x8_t result;
19501 int8x8_t tmp1;
19502 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
19503 __asm__ ("movi %0.8b, 8\n\t"
19504 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19505 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
19506 "bsl %0.8b, %4.8b, %1.8b\n\t"
19507 : "+w"(result), "=w"(tmp1)
19508 : "w"(temp), "w"(idx), "w"(r)
19509 : /* No clobbers */);
19510 return result;
19513 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19514 vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
19516 uint8x8_t result;
19517 uint8x8_t tmp1;
19518 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
19519 __asm__ ("movi %0.8b, 8\n\t"
19520 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19521 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
19522 "bsl %0.8b, %4.8b, %1.8b\n\t"
19523 : "+w"(result), "=w"(tmp1)
19524 : "w"(temp), "w"(idx), "w"(r)
19525 : /* No clobbers */);
19526 return result;
19529 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19530 vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
19532 poly8x8_t result;
19533 poly8x8_t tmp1;
19534 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
19535 __asm__ ("movi %0.8b, 8\n\t"
19536 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19537 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
19538 "bsl %0.8b, %4.8b, %1.8b\n\t"
19539 : "+w"(result), "=w"(tmp1)
19540 : "w"(temp), "w"(idx), "w"(r)
19541 : /* No clobbers */);
19542 return result;
19545 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19546 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
19548 int8x8_t result = r;
19549 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
19550 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
19551 : "+w"(result)
19552 : "w"(temp), "w"(idx)
19553 : /* No clobbers */);
19554 return result;
19557 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19558 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
19560 uint8x8_t result = r;
19561 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
19562 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
19563 : "+w"(result)
19564 : "w"(temp), "w"(idx)
19565 : /* No clobbers */);
19566 return result;
19569 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19570 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
19572 poly8x8_t result = r;
19573 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
19574 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
19575 : "+w"(result)
19576 : "w"(temp), "w"(idx)
19577 : /* No clobbers */);
19578 return result;
19581 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19582 vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
19584 int8x8_t result;
19585 int8x8_t tmp1;
19586 int8x16x2_t temp;
19587 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
19588 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
19589 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
19590 "movi %0.8b, 24\n\t"
19591 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19592 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
19593 "bsl %0.8b, %4.8b, %1.8b\n\t"
19594 : "+w"(result), "=w"(tmp1)
19595 : "Q"(temp), "w"(idx), "w"(r)
19596 : "v16", "v17", "memory");
19597 return result;
19600 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19601 vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
19603 uint8x8_t result;
19604 uint8x8_t tmp1;
19605 uint8x16x2_t temp;
19606 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
19607 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
19608 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
19609 "movi %0.8b, 24\n\t"
19610 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19611 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
19612 "bsl %0.8b, %4.8b, %1.8b\n\t"
19613 : "+w"(result), "=w"(tmp1)
19614 : "Q"(temp), "w"(idx), "w"(r)
19615 : "v16", "v17", "memory");
19616 return result;
19619 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19620 vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
19622 poly8x8_t result;
19623 poly8x8_t tmp1;
19624 poly8x16x2_t temp;
19625 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
19626 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
19627 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
19628 "movi %0.8b, 24\n\t"
19629 "cmhs %0.8b, %3.8b, %0.8b\n\t"
19630 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
19631 "bsl %0.8b, %4.8b, %1.8b\n\t"
19632 : "+w"(result), "=w"(tmp1)
19633 : "Q"(temp), "w"(idx), "w"(r)
19634 : "v16", "v17", "memory");
19635 return result;
19638 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19639 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
19641 int8x8_t result = r;
19642 int8x16x2_t temp;
19643 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
19644 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
19645 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19646 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19647 : "+w"(result)
19648 : "Q"(temp), "w"(idx)
19649 : "v16", "v17", "memory");
19650 return result;
19653 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19654 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
19656 uint8x8_t result = r;
19657 uint8x16x2_t temp;
19658 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
19659 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
19660 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19661 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19662 : "+w"(result)
19663 : "Q"(temp), "w"(idx)
19664 : "v16", "v17", "memory");
19665 return result;
19668 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19669 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
19671 poly8x8_t result = r;
19672 poly8x16x2_t temp;
19673 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
19674 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
19675 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
19676 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
19677 : "+w"(result)
19678 : "Q"(temp), "w"(idx)
19679 : "v16", "v17", "memory");
19680 return result;
19683 /* End of temporary inline asm. */
19685 /* Start of optimal implementations in approved order. */
19687 /* vabs */
19689 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19690 vabs_f32 (float32x2_t __a)
19692 return __builtin_aarch64_absv2sf (__a);
19695 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19696 vabsq_f32 (float32x4_t __a)
19698 return __builtin_aarch64_absv4sf (__a);
19701 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19702 vabsq_f64 (float64x2_t __a)
19704 return __builtin_aarch64_absv2df (__a);
19707 /* vadd */
19709 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19710 vaddd_s64 (int64x1_t __a, int64x1_t __b)
19712 return __a + __b;
19715 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19716 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
19718 return __a + __b;
19721 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19722 vaddv_f32 (float32x2_t __a)
19724 float32x2_t t = __builtin_aarch64_addvv2sf (__a);
19725 return vget_lane_f32 (t, 0);
19728 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19729 vaddvq_f32 (float32x4_t __a)
19731 float32x4_t t = __builtin_aarch64_addvv4sf (__a);
19732 return vgetq_lane_f32 (t, 0);
19735 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19736 vaddvq_f64 (float64x2_t __a)
19738 float64x2_t t = __builtin_aarch64_addvv2df (__a);
19739 return vgetq_lane_f64 (t, 0);
19742 /* vceq */
19744 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19745 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
19747 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
19748 (int8x8_t) __b);
19751 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19752 vceq_s8 (int8x8_t __a, int8x8_t __b)
19754 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
19757 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19758 vceq_s16 (int16x4_t __a, int16x4_t __b)
19760 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
19763 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19764 vceq_s32 (int32x2_t __a, int32x2_t __b)
19766 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
19769 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19770 vceq_s64 (int64x1_t __a, int64x1_t __b)
19772 return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
19775 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19776 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
19778 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
19779 (int8x8_t) __b);
19782 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19783 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
19785 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
19786 (int16x4_t) __b);
19789 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19790 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
19792 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
19793 (int32x2_t) __b);
19796 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19797 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
19799 return (uint64x1_t) __builtin_aarch64_cmeqdi ((int64x1_t) __a,
19800 (int64x1_t) __b);
19803 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19804 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
19806 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
19807 (int8x16_t) __b);
19810 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19811 vceqq_s8 (int8x16_t __a, int8x16_t __b)
19813 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
19816 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19817 vceqq_s16 (int16x8_t __a, int16x8_t __b)
19819 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
19822 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19823 vceqq_s32 (int32x4_t __a, int32x4_t __b)
19825 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
19828 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19829 vceqq_s64 (int64x2_t __a, int64x2_t __b)
19831 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
19834 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19835 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
19837 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
19838 (int8x16_t) __b);
19841 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19842 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
19844 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
19845 (int16x8_t) __b);
19848 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19849 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
19851 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
19852 (int32x4_t) __b);
19855 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19856 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
19858 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
19859 (int64x2_t) __b);
19862 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19863 vceqd_s64 (int64x1_t __a, int64x1_t __b)
19865 return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
19868 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19869 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
19871 return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, __b);
19874 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19875 vceqzd_s64 (int64x1_t __a)
19877 return (uint64x1_t) __builtin_aarch64_cmeqdi (__a, 0);
19880 /* vcge */
19882 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19883 vcge_s8 (int8x8_t __a, int8x8_t __b)
19885 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
19888 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19889 vcge_s16 (int16x4_t __a, int16x4_t __b)
19891 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
19894 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19895 vcge_s32 (int32x2_t __a, int32x2_t __b)
19897 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
19900 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19901 vcge_s64 (int64x1_t __a, int64x1_t __b)
19903 return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
19906 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19907 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
19909 return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __a,
19910 (int8x8_t) __b);
19913 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19914 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
19916 return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __a,
19917 (int16x4_t) __b);
19920 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19921 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
19923 return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __a,
19924 (int32x2_t) __b);
19927 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19928 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
19930 return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
19931 (int64x1_t) __b);
19934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19935 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
19937 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
19940 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19941 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
19943 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
19946 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19947 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
19949 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
19952 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19953 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
19955 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
19958 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19959 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
19961 return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __a,
19962 (int8x16_t) __b);
19965 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19966 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
19968 return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __a,
19969 (int16x8_t) __b);
19972 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19973 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
19975 return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __a,
19976 (int32x4_t) __b);
19979 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19980 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
19982 return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __a,
19983 (int64x2_t) __b);
19986 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19987 vcged_s64 (int64x1_t __a, int64x1_t __b)
19989 return (uint64x1_t) __builtin_aarch64_cmgedi (__a, __b);
19992 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19993 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
19995 return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __a,
19996 (int64x1_t) __b);
19999 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20000 vcgezd_s64 (int64x1_t __a)
20002 return (uint64x1_t) __builtin_aarch64_cmgedi (__a, 0);
20005 /* vcgt */
20007 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20008 vcgt_s8 (int8x8_t __a, int8x8_t __b)
20010 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
20013 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20014 vcgt_s16 (int16x4_t __a, int16x4_t __b)
20016 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
20019 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20020 vcgt_s32 (int32x2_t __a, int32x2_t __b)
20022 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
20025 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20026 vcgt_s64 (int64x1_t __a, int64x1_t __b)
20028 return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
20031 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20032 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
20034 return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __a,
20035 (int8x8_t) __b);
20038 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20039 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
20041 return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __a,
20042 (int16x4_t) __b);
20045 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20046 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
20048 return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __a,
20049 (int32x2_t) __b);
20052 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20053 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
20055 return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
20056 (int64x1_t) __b);
20059 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20060 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
20062 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
20065 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20066 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
20068 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
20071 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20072 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
20074 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
20077 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20078 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
20080 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
20083 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20084 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
20086 return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __a,
20087 (int8x16_t) __b);
20090 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20091 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
20093 return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __a,
20094 (int16x8_t) __b);
20097 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20098 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
20100 return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __a,
20101 (int32x4_t) __b);
20104 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20105 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
20107 return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __a,
20108 (int64x2_t) __b);
20111 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20112 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
20114 return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, __b);
20117 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20118 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
20120 return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __a,
20121 (int64x1_t) __b);
20124 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20125 vcgtzd_s64 (int64x1_t __a)
20127 return (uint64x1_t) __builtin_aarch64_cmgtdi (__a, 0);
20130 /* vcle */
20132 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20133 vcle_s8 (int8x8_t __a, int8x8_t __b)
20135 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
20138 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20139 vcle_s16 (int16x4_t __a, int16x4_t __b)
20141 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
20144 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20145 vcle_s32 (int32x2_t __a, int32x2_t __b)
20147 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
20150 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20151 vcle_s64 (int64x1_t __a, int64x1_t __b)
20153 return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
20156 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20157 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
20159 return (uint8x8_t) __builtin_aarch64_cmhsv8qi ((int8x8_t) __b,
20160 (int8x8_t) __a);
20163 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20164 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
20166 return (uint16x4_t) __builtin_aarch64_cmhsv4hi ((int16x4_t) __b,
20167 (int16x4_t) __a);
20170 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20171 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
20173 return (uint32x2_t) __builtin_aarch64_cmhsv2si ((int32x2_t) __b,
20174 (int32x2_t) __a);
20177 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20178 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
20180 return (uint64x1_t) __builtin_aarch64_cmhsdi ((int64x1_t) __b,
20181 (int64x1_t) __a);
20184 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20185 vcleq_s8 (int8x16_t __a, int8x16_t __b)
20187 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
20190 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20191 vcleq_s16 (int16x8_t __a, int16x8_t __b)
20193 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
20196 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20197 vcleq_s32 (int32x4_t __a, int32x4_t __b)
20199 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
20202 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20203 vcleq_s64 (int64x2_t __a, int64x2_t __b)
20205 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
20208 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20209 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
20211 return (uint8x16_t) __builtin_aarch64_cmhsv16qi ((int8x16_t) __b,
20212 (int8x16_t) __a);
20215 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20216 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
20218 return (uint16x8_t) __builtin_aarch64_cmhsv8hi ((int16x8_t) __b,
20219 (int16x8_t) __a);
20222 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20223 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
20225 return (uint32x4_t) __builtin_aarch64_cmhsv4si ((int32x4_t) __b,
20226 (int32x4_t) __a);
20229 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20230 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
20232 return (uint64x2_t) __builtin_aarch64_cmhsv2di ((int64x2_t) __b,
20233 (int64x2_t) __a);
20236 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20237 vcled_s64 (int64x1_t __a, int64x1_t __b)
20239 return (uint64x1_t) __builtin_aarch64_cmgedi (__b, __a);
20242 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20243 vclezd_s64 (int64x1_t __a)
20245 return (uint64x1_t) __builtin_aarch64_cmledi (__a, 0);
20248 /* vclt */
20250 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20251 vclt_s8 (int8x8_t __a, int8x8_t __b)
20253 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
20256 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20257 vclt_s16 (int16x4_t __a, int16x4_t __b)
20259 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
20262 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20263 vclt_s32 (int32x2_t __a, int32x2_t __b)
20265 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
20268 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20269 vclt_s64 (int64x1_t __a, int64x1_t __b)
20271 return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
20274 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20275 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
20277 return (uint8x8_t) __builtin_aarch64_cmhiv8qi ((int8x8_t) __b,
20278 (int8x8_t) __a);
20281 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20282 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
20284 return (uint16x4_t) __builtin_aarch64_cmhiv4hi ((int16x4_t) __b,
20285 (int16x4_t) __a);
20288 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20289 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
20291 return (uint32x2_t) __builtin_aarch64_cmhiv2si ((int32x2_t) __b,
20292 (int32x2_t) __a);
20295 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20296 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
20298 return (uint64x1_t) __builtin_aarch64_cmhidi ((int64x1_t) __b,
20299 (int64x1_t) __a);
20302 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20303 vcltq_s8 (int8x16_t __a, int8x16_t __b)
20305 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
20308 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20309 vcltq_s16 (int16x8_t __a, int16x8_t __b)
20311 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
20314 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20315 vcltq_s32 (int32x4_t __a, int32x4_t __b)
20317 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
20320 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20321 vcltq_s64 (int64x2_t __a, int64x2_t __b)
20323 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
20326 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20327 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
20329 return (uint8x16_t) __builtin_aarch64_cmhiv16qi ((int8x16_t) __b,
20330 (int8x16_t) __a);
20333 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20334 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
20336 return (uint16x8_t) __builtin_aarch64_cmhiv8hi ((int16x8_t) __b,
20337 (int16x8_t) __a);
20340 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20341 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
20343 return (uint32x4_t) __builtin_aarch64_cmhiv4si ((int32x4_t) __b,
20344 (int32x4_t) __a);
20347 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20348 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
20350 return (uint64x2_t) __builtin_aarch64_cmhiv2di ((int64x2_t) __b,
20351 (int64x2_t) __a);
20354 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20355 vcltd_s64 (int64x1_t __a, int64x1_t __b)
20357 return (uint64x1_t) __builtin_aarch64_cmgtdi (__b, __a);
20360 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20361 vcltzd_s64 (int64x1_t __a)
20363 return (uint64x1_t) __builtin_aarch64_cmltdi (__a, 0);
20366 /* vdup */
20368 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20369 vdupb_lane_s8 (int8x16_t a, int const b)
20371 return __builtin_aarch64_dup_laneqi (a, b);
20374 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20375 vdupb_lane_u8 (uint8x16_t a, int const b)
20377 return (uint8x1_t) __builtin_aarch64_dup_laneqi ((int8x16_t) a, b);
20380 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20381 vduph_lane_s16 (int16x8_t a, int const b)
20383 return __builtin_aarch64_dup_lanehi (a, b);
20386 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20387 vduph_lane_u16 (uint16x8_t a, int const b)
20389 return (uint16x1_t) __builtin_aarch64_dup_lanehi ((int16x8_t) a, b);
20392 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20393 vdups_lane_s32 (int32x4_t a, int const b)
20395 return __builtin_aarch64_dup_lanesi (a, b);
20398 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20399 vdups_lane_u32 (uint32x4_t a, int const b)
20401 return (uint32x1_t) __builtin_aarch64_dup_lanesi ((int32x4_t) a, b);
20404 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20405 vdupd_lane_s64 (int64x2_t a, int const b)
20407 return __builtin_aarch64_dup_lanedi (a, b);
20410 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20411 vdupd_lane_u64 (uint64x2_t a, int const b)
20413 return (uint64x1_t) __builtin_aarch64_dup_lanedi ((int64x2_t) a, b);
20416 /* vldn */
20418 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
20419 vld2_s64 (const int64_t * __a)
20421 int64x1x2_t ret;
20422 __builtin_aarch64_simd_oi __o;
20423 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
20424 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
20425 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
20426 return ret;
20429 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
20430 vld2_u64 (const uint64_t * __a)
20432 uint64x1x2_t ret;
20433 __builtin_aarch64_simd_oi __o;
20434 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
20435 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
20436 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
20437 return ret;
20440 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
20441 vld2_f64 (const float64_t * __a)
20443 float64x1x2_t ret;
20444 __builtin_aarch64_simd_oi __o;
20445 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
20446 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
20447 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
20448 return ret;
20451 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
20452 vld2_s8 (const int8_t * __a)
20454 int8x8x2_t ret;
20455 __builtin_aarch64_simd_oi __o;
20456 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20457 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20458 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20459 return ret;
20462 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
20463 vld2_p8 (const poly8_t * __a)
20465 poly8x8x2_t ret;
20466 __builtin_aarch64_simd_oi __o;
20467 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20468 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20469 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20470 return ret;
20473 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
20474 vld2_s16 (const int16_t * __a)
20476 int16x4x2_t ret;
20477 __builtin_aarch64_simd_oi __o;
20478 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20479 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20480 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20481 return ret;
20484 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
20485 vld2_p16 (const poly16_t * __a)
20487 poly16x4x2_t ret;
20488 __builtin_aarch64_simd_oi __o;
20489 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20490 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20491 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20492 return ret;
20495 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
20496 vld2_s32 (const int32_t * __a)
20498 int32x2x2_t ret;
20499 __builtin_aarch64_simd_oi __o;
20500 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
20501 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
20502 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
20503 return ret;
20506 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
20507 vld2_u8 (const uint8_t * __a)
20509 uint8x8x2_t ret;
20510 __builtin_aarch64_simd_oi __o;
20511 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20512 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20513 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20514 return ret;
20517 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
20518 vld2_u16 (const uint16_t * __a)
20520 uint16x4x2_t ret;
20521 __builtin_aarch64_simd_oi __o;
20522 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20523 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20524 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20525 return ret;
20528 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
20529 vld2_u32 (const uint32_t * __a)
20531 uint32x2x2_t ret;
20532 __builtin_aarch64_simd_oi __o;
20533 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
20534 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
20535 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
20536 return ret;
20539 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
20540 vld2_f32 (const float32_t * __a)
20542 float32x2x2_t ret;
20543 __builtin_aarch64_simd_oi __o;
20544 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
20545 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
20546 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
20547 return ret;
20550 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
20551 vld2q_s8 (const int8_t * __a)
20553 int8x16x2_t ret;
20554 __builtin_aarch64_simd_oi __o;
20555 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20556 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20557 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20558 return ret;
20561 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
20562 vld2q_p8 (const poly8_t * __a)
20564 poly8x16x2_t ret;
20565 __builtin_aarch64_simd_oi __o;
20566 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20567 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20568 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20569 return ret;
20572 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
20573 vld2q_s16 (const int16_t * __a)
20575 int16x8x2_t ret;
20576 __builtin_aarch64_simd_oi __o;
20577 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20578 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20579 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20580 return ret;
20583 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
20584 vld2q_p16 (const poly16_t * __a)
20586 poly16x8x2_t ret;
20587 __builtin_aarch64_simd_oi __o;
20588 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20589 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20590 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20591 return ret;
20594 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
20595 vld2q_s32 (const int32_t * __a)
20597 int32x4x2_t ret;
20598 __builtin_aarch64_simd_oi __o;
20599 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
20600 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
20601 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
20602 return ret;
20605 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
20606 vld2q_s64 (const int64_t * __a)
20608 int64x2x2_t ret;
20609 __builtin_aarch64_simd_oi __o;
20610 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
20611 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
20612 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
20613 return ret;
20616 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
20617 vld2q_u8 (const uint8_t * __a)
20619 uint8x16x2_t ret;
20620 __builtin_aarch64_simd_oi __o;
20621 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20622 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20623 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20624 return ret;
20627 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
20628 vld2q_u16 (const uint16_t * __a)
20630 uint16x8x2_t ret;
20631 __builtin_aarch64_simd_oi __o;
20632 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20633 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20634 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20635 return ret;
20638 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
20639 vld2q_u32 (const uint32_t * __a)
20641 uint32x4x2_t ret;
20642 __builtin_aarch64_simd_oi __o;
20643 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
20644 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
20645 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
20646 return ret;
20649 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
20650 vld2q_u64 (const uint64_t * __a)
20652 uint64x2x2_t ret;
20653 __builtin_aarch64_simd_oi __o;
20654 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
20655 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
20656 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
20657 return ret;
20660 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
20661 vld2q_f32 (const float32_t * __a)
20663 float32x4x2_t ret;
20664 __builtin_aarch64_simd_oi __o;
20665 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
20666 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
20667 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
20668 return ret;
20671 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
20672 vld2q_f64 (const float64_t * __a)
20674 float64x2x2_t ret;
20675 __builtin_aarch64_simd_oi __o;
20676 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
20677 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
20678 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
20679 return ret;
20682 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
20683 vld3_s64 (const int64_t * __a)
20685 int64x1x3_t ret;
20686 __builtin_aarch64_simd_ci __o;
20687 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20688 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20689 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20690 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20691 return ret;
20694 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
20695 vld3_u64 (const uint64_t * __a)
20697 uint64x1x3_t ret;
20698 __builtin_aarch64_simd_ci __o;
20699 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20700 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20701 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20702 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20703 return ret;
20706 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
20707 vld3_f64 (const float64_t * __a)
20709 float64x1x3_t ret;
20710 __builtin_aarch64_simd_ci __o;
20711 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
20712 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
20713 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
20714 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
20715 return ret;
20718 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
20719 vld3_s8 (const int8_t * __a)
20721 int8x8x3_t ret;
20722 __builtin_aarch64_simd_ci __o;
20723 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20724 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20725 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20726 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20727 return ret;
20730 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
20731 vld3_p8 (const poly8_t * __a)
20733 poly8x8x3_t ret;
20734 __builtin_aarch64_simd_ci __o;
20735 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20736 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20737 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20738 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20739 return ret;
20742 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
20743 vld3_s16 (const int16_t * __a)
20745 int16x4x3_t ret;
20746 __builtin_aarch64_simd_ci __o;
20747 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20748 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20749 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20750 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20751 return ret;
20754 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
20755 vld3_p16 (const poly16_t * __a)
20757 poly16x4x3_t ret;
20758 __builtin_aarch64_simd_ci __o;
20759 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20760 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20761 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20762 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20763 return ret;
20766 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
20767 vld3_s32 (const int32_t * __a)
20769 int32x2x3_t ret;
20770 __builtin_aarch64_simd_ci __o;
20771 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20772 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20773 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20774 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20775 return ret;
20778 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
20779 vld3_u8 (const uint8_t * __a)
20781 uint8x8x3_t ret;
20782 __builtin_aarch64_simd_ci __o;
20783 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20784 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20785 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20786 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20787 return ret;
20790 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
20791 vld3_u16 (const uint16_t * __a)
20793 uint16x4x3_t ret;
20794 __builtin_aarch64_simd_ci __o;
20795 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20796 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20797 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20798 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20799 return ret;
20802 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
20803 vld3_u32 (const uint32_t * __a)
20805 uint32x2x3_t ret;
20806 __builtin_aarch64_simd_ci __o;
20807 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20808 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20809 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20810 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20811 return ret;
20814 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
20815 vld3_f32 (const float32_t * __a)
20817 float32x2x3_t ret;
20818 __builtin_aarch64_simd_ci __o;
20819 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
20820 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
20821 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
20822 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
20823 return ret;
20826 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
20827 vld3q_s8 (const int8_t * __a)
20829 int8x16x3_t ret;
20830 __builtin_aarch64_simd_ci __o;
20831 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20832 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20833 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20834 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20835 return ret;
20838 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
20839 vld3q_p8 (const poly8_t * __a)
20841 poly8x16x3_t ret;
20842 __builtin_aarch64_simd_ci __o;
20843 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20844 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20845 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20846 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20847 return ret;
20850 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
20851 vld3q_s16 (const int16_t * __a)
20853 int16x8x3_t ret;
20854 __builtin_aarch64_simd_ci __o;
20855 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20856 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20857 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20858 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20859 return ret;
20862 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
20863 vld3q_p16 (const poly16_t * __a)
20865 poly16x8x3_t ret;
20866 __builtin_aarch64_simd_ci __o;
20867 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20868 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20869 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20870 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20871 return ret;
20874 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
20875 vld3q_s32 (const int32_t * __a)
20877 int32x4x3_t ret;
20878 __builtin_aarch64_simd_ci __o;
20879 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
20880 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
20881 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
20882 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
20883 return ret;
20886 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
20887 vld3q_s64 (const int64_t * __a)
20889 int64x2x3_t ret;
20890 __builtin_aarch64_simd_ci __o;
20891 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
20892 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
20893 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
20894 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
20895 return ret;
20898 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
20899 vld3q_u8 (const uint8_t * __a)
20901 uint8x16x3_t ret;
20902 __builtin_aarch64_simd_ci __o;
20903 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20904 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20905 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20906 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20907 return ret;
20910 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
20911 vld3q_u16 (const uint16_t * __a)
20913 uint16x8x3_t ret;
20914 __builtin_aarch64_simd_ci __o;
20915 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20916 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20917 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20918 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20919 return ret;
20922 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
20923 vld3q_u32 (const uint32_t * __a)
20925 uint32x4x3_t ret;
20926 __builtin_aarch64_simd_ci __o;
20927 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
20928 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
20929 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
20930 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
20931 return ret;
20934 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
20935 vld3q_u64 (const uint64_t * __a)
20937 uint64x2x3_t ret;
20938 __builtin_aarch64_simd_ci __o;
20939 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
20940 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
20941 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
20942 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
20943 return ret;
20946 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
20947 vld3q_f32 (const float32_t * __a)
20949 float32x4x3_t ret;
20950 __builtin_aarch64_simd_ci __o;
20951 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
20952 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
20953 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
20954 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
20955 return ret;
20958 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
20959 vld3q_f64 (const float64_t * __a)
20961 float64x2x3_t ret;
20962 __builtin_aarch64_simd_ci __o;
20963 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
20964 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
20965 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
20966 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
20967 return ret;
20970 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
20971 vld4_s64 (const int64_t * __a)
20973 int64x1x4_t ret;
20974 __builtin_aarch64_simd_xi __o;
20975 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
20976 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
20977 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
20978 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
20979 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
20980 return ret;
20983 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
20984 vld4_u64 (const uint64_t * __a)
20986 uint64x1x4_t ret;
20987 __builtin_aarch64_simd_xi __o;
20988 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
20989 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
20990 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
20991 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
20992 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
20993 return ret;
20996 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
20997 vld4_f64 (const float64_t * __a)
20999 float64x1x4_t ret;
21000 __builtin_aarch64_simd_xi __o;
21001 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
21002 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
21003 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
21004 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
21005 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
21006 return ret;
21009 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
21010 vld4_s8 (const int8_t * __a)
21012 int8x8x4_t ret;
21013 __builtin_aarch64_simd_xi __o;
21014 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
21015 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
21016 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
21017 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
21018 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
21019 return ret;
21022 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
21023 vld4_p8 (const poly8_t * __a)
21025 poly8x8x4_t ret;
21026 __builtin_aarch64_simd_xi __o;
21027 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
21028 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
21029 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
21030 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
21031 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
21032 return ret;
21035 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
21036 vld4_s16 (const int16_t * __a)
21038 int16x4x4_t ret;
21039 __builtin_aarch64_simd_xi __o;
21040 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
21041 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
21042 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
21043 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
21044 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
21045 return ret;
21048 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
21049 vld4_p16 (const poly16_t * __a)
21051 poly16x4x4_t ret;
21052 __builtin_aarch64_simd_xi __o;
21053 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
21054 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
21055 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
21056 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
21057 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
21058 return ret;
21061 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
21062 vld4_s32 (const int32_t * __a)
21064 int32x2x4_t ret;
21065 __builtin_aarch64_simd_xi __o;
21066 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
21067 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
21068 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
21069 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
21070 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
21071 return ret;
21074 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
21075 vld4_u8 (const uint8_t * __a)
21077 uint8x8x4_t ret;
21078 __builtin_aarch64_simd_xi __o;
21079 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
21080 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
21081 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
21082 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
21083 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
21084 return ret;
21087 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
21088 vld4_u16 (const uint16_t * __a)
21090 uint16x4x4_t ret;
21091 __builtin_aarch64_simd_xi __o;
21092 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
21093 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
21094 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
21095 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
21096 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
21097 return ret;
21100 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
21101 vld4_u32 (const uint32_t * __a)
21103 uint32x2x4_t ret;
21104 __builtin_aarch64_simd_xi __o;
21105 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
21106 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
21107 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
21108 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
21109 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
21110 return ret;
21113 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
21114 vld4_f32 (const float32_t * __a)
21116 float32x2x4_t ret;
21117 __builtin_aarch64_simd_xi __o;
21118 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
21119 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
21120 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
21121 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
21122 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
21123 return ret;
21126 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
21127 vld4q_s8 (const int8_t * __a)
21129 int8x16x4_t ret;
21130 __builtin_aarch64_simd_xi __o;
21131 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
21132 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
21133 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
21134 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
21135 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
21136 return ret;
21139 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
21140 vld4q_p8 (const poly8_t * __a)
21142 poly8x16x4_t ret;
21143 __builtin_aarch64_simd_xi __o;
21144 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
21145 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
21146 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
21147 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
21148 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
21149 return ret;
21152 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
21153 vld4q_s16 (const int16_t * __a)
21155 int16x8x4_t ret;
21156 __builtin_aarch64_simd_xi __o;
21157 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
21158 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
21159 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
21160 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
21161 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
21162 return ret;
21165 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
21166 vld4q_p16 (const poly16_t * __a)
21168 poly16x8x4_t ret;
21169 __builtin_aarch64_simd_xi __o;
21170 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
21171 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
21172 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
21173 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
21174 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
21175 return ret;
21178 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
21179 vld4q_s32 (const int32_t * __a)
21181 int32x4x4_t ret;
21182 __builtin_aarch64_simd_xi __o;
21183 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
21184 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
21185 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
21186 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
21187 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
21188 return ret;
21191 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
21192 vld4q_s64 (const int64_t * __a)
21194 int64x2x4_t ret;
21195 __builtin_aarch64_simd_xi __o;
21196 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
21197 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
21198 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
21199 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
21200 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
21201 return ret;
21204 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
21205 vld4q_u8 (const uint8_t * __a)
21207 uint8x16x4_t ret;
21208 __builtin_aarch64_simd_xi __o;
21209 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
21210 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
21211 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
21212 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
21213 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
21214 return ret;
21217 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
21218 vld4q_u16 (const uint16_t * __a)
21220 uint16x8x4_t ret;
21221 __builtin_aarch64_simd_xi __o;
21222 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
21223 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
21224 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
21225 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
21226 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
21227 return ret;
21230 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
21231 vld4q_u32 (const uint32_t * __a)
21233 uint32x4x4_t ret;
21234 __builtin_aarch64_simd_xi __o;
21235 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
21236 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
21237 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
21238 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
21239 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
21240 return ret;
21243 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
21244 vld4q_u64 (const uint64_t * __a)
21246 uint64x2x4_t ret;
21247 __builtin_aarch64_simd_xi __o;
21248 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
21249 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
21250 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
21251 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
21252 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
21253 return ret;
21256 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
21257 vld4q_f32 (const float32_t * __a)
21259 float32x4x4_t ret;
21260 __builtin_aarch64_simd_xi __o;
21261 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
21262 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
21263 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
21264 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
21265 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
21266 return ret;
21269 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
21270 vld4q_f64 (const float64_t * __a)
21272 float64x2x4_t ret;
21273 __builtin_aarch64_simd_xi __o;
21274 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
21275 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
21276 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
21277 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
21278 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
21279 return ret;
21282 /* vmax */
21284 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21285 vmax_f32 (float32x2_t __a, float32x2_t __b)
21287 return __builtin_aarch64_fmaxv2sf (__a, __b);
21290 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21291 vmax_s8 (int8x8_t __a, int8x8_t __b)
21293 return __builtin_aarch64_smaxv8qi (__a, __b);
21296 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21297 vmax_s16 (int16x4_t __a, int16x4_t __b)
21299 return __builtin_aarch64_smaxv4hi (__a, __b);
21302 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21303 vmax_s32 (int32x2_t __a, int32x2_t __b)
21305 return __builtin_aarch64_smaxv2si (__a, __b);
21308 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21309 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
21311 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
21312 (int8x8_t) __b);
21315 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21316 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
21318 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
21319 (int16x4_t) __b);
21322 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21323 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
21325 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
21326 (int32x2_t) __b);
21329 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21330 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
21332 return __builtin_aarch64_fmaxv4sf (__a, __b);
21335 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21336 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
21338 return __builtin_aarch64_fmaxv2df (__a, __b);
21341 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21342 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
21344 return __builtin_aarch64_smaxv16qi (__a, __b);
21347 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21348 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
21350 return __builtin_aarch64_smaxv8hi (__a, __b);
21353 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21354 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
21356 return __builtin_aarch64_smaxv4si (__a, __b);
21359 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21360 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
21362 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
21363 (int8x16_t) __b);
21366 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21367 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
21369 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
21370 (int16x8_t) __b);
21373 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21374 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
21376 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
21377 (int32x4_t) __b);
21380 /* vmin */
21382 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21383 vmin_f32 (float32x2_t __a, float32x2_t __b)
21385 return __builtin_aarch64_fminv2sf (__a, __b);
21388 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21389 vmin_s8 (int8x8_t __a, int8x8_t __b)
21391 return __builtin_aarch64_sminv8qi (__a, __b);
21394 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21395 vmin_s16 (int16x4_t __a, int16x4_t __b)
21397 return __builtin_aarch64_sminv4hi (__a, __b);
21400 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21401 vmin_s32 (int32x2_t __a, int32x2_t __b)
21403 return __builtin_aarch64_sminv2si (__a, __b);
21406 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21407 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
21409 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
21410 (int8x8_t) __b);
21413 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21414 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
21416 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
21417 (int16x4_t) __b);
21420 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21421 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
21423 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
21424 (int32x2_t) __b);
21427 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21428 vminq_f32 (float32x4_t __a, float32x4_t __b)
21430 return __builtin_aarch64_fminv4sf (__a, __b);
21433 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21434 vminq_f64 (float64x2_t __a, float64x2_t __b)
21436 return __builtin_aarch64_fminv2df (__a, __b);
21439 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21440 vminq_s8 (int8x16_t __a, int8x16_t __b)
21442 return __builtin_aarch64_sminv16qi (__a, __b);
21445 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21446 vminq_s16 (int16x8_t __a, int16x8_t __b)
21448 return __builtin_aarch64_sminv8hi (__a, __b);
21451 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21452 vminq_s32 (int32x4_t __a, int32x4_t __b)
21454 return __builtin_aarch64_sminv4si (__a, __b);
21457 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21458 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
21460 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
21461 (int8x16_t) __b);
21464 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21465 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
21467 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
21468 (int16x8_t) __b);
21471 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21472 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
21474 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
21475 (int32x4_t) __b);
21478 /* vmla */
21480 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21481 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21483 return a + b * c;
21486 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21487 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21489 return a + b * c;
21492 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21493 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21495 return a + b * c;
21498 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21499 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21501 return a - b * c;
21504 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21505 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21507 return a - b * c;
21510 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21511 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21513 return a - b * c;
21516 /* vqabs */
21518 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21519 vqabsq_s64 (int64x2_t __a)
21521 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
21524 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21525 vqabsb_s8 (int8x1_t __a)
21527 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
21530 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21531 vqabsh_s16 (int16x1_t __a)
21533 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
21536 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21537 vqabss_s32 (int32x1_t __a)
21539 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
21542 /* vqadd */
21544 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21545 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
21547 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
21550 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21551 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
21553 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
21556 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21557 vqadds_s32 (int32x1_t __a, int32x1_t __b)
21559 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
21562 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21563 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
21565 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
21568 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21569 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
21571 return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
21574 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21575 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
21577 return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
21580 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21581 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
21583 return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
21586 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21587 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
21589 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
21592 /* vqdmlal */
21594 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21595 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21597 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
21600 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21601 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21603 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
21606 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21607 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21608 int const __d)
21610 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
21613 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21614 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21615 int const __d)
21617 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
21620 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21621 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21623 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
21626 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21627 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21629 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
21630 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
21633 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21634 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21636 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
21639 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21640 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21642 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
21645 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21646 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21648 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
21651 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21652 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21654 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
21657 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21658 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21659 int const __d)
21661 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
21664 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21665 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21666 int const __d)
21668 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
21671 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21672 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21674 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
21677 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21678 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21680 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
21681 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
21684 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21685 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21687 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
21690 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21691 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21693 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
21696 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21697 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21699 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
21702 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21703 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21705 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
21708 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21709 vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21711 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21714 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21715 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21717 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21720 /* vqdmlsl */
21722 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21723 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21725 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21728 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21729 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21731 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21734 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21735 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21736 int const __d)
21738 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21741 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21742 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21743 int const __d)
21745 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21748 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21749 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21751 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21754 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21755 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21757 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
21758 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
21761 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21762 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21764 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21767 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21768 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21770 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21773 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21774 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21776 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21779 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21780 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21782 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21785 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21786 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21787 int const __d)
21789 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21792 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21793 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21794 int const __d)
21796 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21799 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21800 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21802 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21805 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21806 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21808 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
21809 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
21812 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21813 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21815 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21818 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21819 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21821 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21824 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21825 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21827 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21830 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21831 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21833 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21836 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21837 vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21839 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21842 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21843 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21845 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21848 /* vqdmulh */
21850 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21851 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21853 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21856 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21857 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21859 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21862 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21863 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21865 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21868 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21869 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21871 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21874 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21875 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21877 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
21880 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21881 vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21883 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
21886 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21887 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21889 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
21892 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21893 vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21895 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
21898 /* vqdmull */
21900 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21901 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
21903 return __builtin_aarch64_sqdmullv4hi (__a, __b);
21906 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21907 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
21909 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
21912 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21913 vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21915 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
21918 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21919 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21921 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
21924 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21925 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
21927 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
21930 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21931 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
21933 int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0)));
21934 return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
21937 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21938 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
21940 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
21943 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21944 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
21946 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
21949 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21950 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
21952 return __builtin_aarch64_sqdmullv2si (__a, __b);
21955 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21956 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
21958 return __builtin_aarch64_sqdmull2v4si (__a, __b);
21961 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21962 vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21964 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
21967 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21968 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21970 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
21973 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21974 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
21976 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
21979 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21980 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
21982 int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0)));
21983 return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
21986 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21987 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
21989 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
21992 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21993 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
21995 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
21998 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21999 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
22001 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
22004 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22005 vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
22007 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
22010 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22011 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
22013 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
22016 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22017 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
22019 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
22022 /* vqmovn */
22024 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22025 vqmovn_s16 (int16x8_t __a)
22027 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
22030 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22031 vqmovn_s32 (int32x4_t __a)
22033 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
22036 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22037 vqmovn_s64 (int64x2_t __a)
22039 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
22042 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22043 vqmovn_u16 (uint16x8_t __a)
22045 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
22048 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22049 vqmovn_u32 (uint32x4_t __a)
22051 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
22054 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22055 vqmovn_u64 (uint64x2_t __a)
22057 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
22060 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22061 vqmovnh_s16 (int16x1_t __a)
22063 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
22066 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22067 vqmovns_s32 (int32x1_t __a)
22069 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
22072 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22073 vqmovnd_s64 (int64x1_t __a)
22075 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
22078 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22079 vqmovnh_u16 (uint16x1_t __a)
22081 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
22084 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22085 vqmovns_u32 (uint32x1_t __a)
22087 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
22090 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22091 vqmovnd_u64 (uint64x1_t __a)
22093 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
22096 /* vqmovun */
22098 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22099 vqmovun_s16 (int16x8_t __a)
22101 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
22104 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22105 vqmovun_s32 (int32x4_t __a)
22107 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
22110 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22111 vqmovun_s64 (int64x2_t __a)
22113 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
22116 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22117 vqmovunh_s16 (int16x1_t __a)
22119 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
22122 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22123 vqmovuns_s32 (int32x1_t __a)
22125 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
22128 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22129 vqmovund_s64 (int64x1_t __a)
22131 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
22134 /* vqneg */
22136 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22137 vqnegq_s64 (int64x2_t __a)
22139 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
22142 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22143 vqnegb_s8 (int8x1_t __a)
22145 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
22148 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22149 vqnegh_s16 (int16x1_t __a)
22151 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
22154 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22155 vqnegs_s32 (int32x1_t __a)
22157 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
22160 /* vqrdmulh */
22162 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22163 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22165 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
22168 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22169 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22171 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
22174 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22175 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
22177 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
22180 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22181 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
22183 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
22186 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22187 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
22189 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
22192 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22193 vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
22195 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
22198 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22199 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
22201 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
22204 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22205 vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
22207 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
22210 /* vqrshl */
22212 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22213 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
22215 return __builtin_aarch64_sqrshlv8qi (__a, __b);
22218 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22219 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
22221 return __builtin_aarch64_sqrshlv4hi (__a, __b);
22224 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22225 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
22227 return __builtin_aarch64_sqrshlv2si (__a, __b);
22230 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22231 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
22233 return __builtin_aarch64_sqrshldi (__a, __b);
22236 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22237 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
22239 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
22242 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22243 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
22245 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
22248 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22249 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
22251 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
22254 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22255 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
22257 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
22260 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22261 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
22263 return __builtin_aarch64_sqrshlv16qi (__a, __b);
22266 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22267 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
22269 return __builtin_aarch64_sqrshlv8hi (__a, __b);
22272 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22273 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
22275 return __builtin_aarch64_sqrshlv4si (__a, __b);
22278 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22279 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
22281 return __builtin_aarch64_sqrshlv2di (__a, __b);
22284 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22285 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22287 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
22290 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22291 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22293 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
22296 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22297 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22299 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
22302 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22303 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22305 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
22308 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22309 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
22311 return __builtin_aarch64_sqrshlqi (__a, __b);
22314 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22315 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
22317 return __builtin_aarch64_sqrshlhi (__a, __b);
22320 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22321 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
22323 return __builtin_aarch64_sqrshlsi (__a, __b);
22326 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22327 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
22329 return __builtin_aarch64_sqrshldi (__a, __b);
22332 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22333 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22335 return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
22338 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22339 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22341 return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
22344 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22345 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
22347 return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
22350 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22351 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22353 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
22356 /* vqrshrn */
22358 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22359 vqrshrn_n_s16 (int16x8_t __a, const int __b)
22361 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
22364 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22365 vqrshrn_n_s32 (int32x4_t __a, const int __b)
22367 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
22370 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22371 vqrshrn_n_s64 (int64x2_t __a, const int __b)
22373 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
22376 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22377 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
22379 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
22382 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22383 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
22385 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
22388 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22389 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
22391 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
22394 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22395 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
22397 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
22400 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22401 vqrshrns_n_s32 (int32x1_t __a, const int __b)
22403 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
22406 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22407 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
22409 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
22412 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22413 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
22415 return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
22418 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22419 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
22421 return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
22424 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22425 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
22427 return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
22430 /* vqrshrun */
22432 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22433 vqrshrun_n_s16 (int16x8_t __a, const int __b)
22435 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
22438 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22439 vqrshrun_n_s32 (int32x4_t __a, const int __b)
22441 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
22444 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22445 vqrshrun_n_s64 (int64x2_t __a, const int __b)
22447 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
22450 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22451 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
22453 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
22456 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22457 vqrshruns_n_s32 (int32x1_t __a, const int __b)
22459 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
22462 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22463 vqrshrund_n_s64 (int64x1_t __a, const int __b)
22465 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
22468 /* vqshl */
22470 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22471 vqshl_s8 (int8x8_t __a, int8x8_t __b)
22473 return __builtin_aarch64_sqshlv8qi (__a, __b);
22476 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22477 vqshl_s16 (int16x4_t __a, int16x4_t __b)
22479 return __builtin_aarch64_sqshlv4hi (__a, __b);
22482 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22483 vqshl_s32 (int32x2_t __a, int32x2_t __b)
22485 return __builtin_aarch64_sqshlv2si (__a, __b);
22488 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22489 vqshl_s64 (int64x1_t __a, int64x1_t __b)
22491 return __builtin_aarch64_sqshldi (__a, __b);
22494 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22495 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
22497 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
22500 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22501 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
22503 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
22506 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22507 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
22509 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
22512 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22513 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
22515 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
22518 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22519 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
22521 return __builtin_aarch64_sqshlv16qi (__a, __b);
22524 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22525 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
22527 return __builtin_aarch64_sqshlv8hi (__a, __b);
22530 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22531 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
22533 return __builtin_aarch64_sqshlv4si (__a, __b);
22536 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22537 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
22539 return __builtin_aarch64_sqshlv2di (__a, __b);
22542 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22543 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
22545 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
22548 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22549 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
22551 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
22554 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22555 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
22557 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
22560 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22561 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
22563 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
22566 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22567 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
22569 return __builtin_aarch64_sqshlqi (__a, __b);
22572 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22573 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
22575 return __builtin_aarch64_sqshlhi (__a, __b);
22578 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22579 vqshls_s32 (int32x1_t __a, int32x1_t __b)
22581 return __builtin_aarch64_sqshlsi (__a, __b);
22584 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22585 vqshld_s64 (int64x1_t __a, int64x1_t __b)
22587 return __builtin_aarch64_sqshldi (__a, __b);
22590 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22591 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22593 return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
22596 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22597 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22599 return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
22602 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22603 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
22605 return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
22608 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22609 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
22611 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
22614 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22615 vqshl_n_s8 (int8x8_t __a, const int __b)
22617 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
22620 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22621 vqshl_n_s16 (int16x4_t __a, const int __b)
22623 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
22626 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22627 vqshl_n_s32 (int32x2_t __a, const int __b)
22629 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
22632 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22633 vqshl_n_s64 (int64x1_t __a, const int __b)
22635 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22638 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22639 vqshl_n_u8 (uint8x8_t __a, const int __b)
22641 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
22644 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22645 vqshl_n_u16 (uint16x4_t __a, const int __b)
22647 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
22650 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22651 vqshl_n_u32 (uint32x2_t __a, const int __b)
22653 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
22656 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22657 vqshl_n_u64 (uint64x1_t __a, const int __b)
22659 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
22662 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22663 vqshlq_n_s8 (int8x16_t __a, const int __b)
22665 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
22668 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22669 vqshlq_n_s16 (int16x8_t __a, const int __b)
22671 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
22674 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22675 vqshlq_n_s32 (int32x4_t __a, const int __b)
22677 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
22680 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22681 vqshlq_n_s64 (int64x2_t __a, const int __b)
22683 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
22686 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22687 vqshlq_n_u8 (uint8x16_t __a, const int __b)
22689 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
22692 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22693 vqshlq_n_u16 (uint16x8_t __a, const int __b)
22695 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
22698 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22699 vqshlq_n_u32 (uint32x4_t __a, const int __b)
22701 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
22704 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22705 vqshlq_n_u64 (uint64x2_t __a, const int __b)
22707 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22710 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22711 vqshlb_n_s8 (int8x1_t __a, const int __b)
22713 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22716 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22717 vqshlh_n_s16 (int16x1_t __a, const int __b)
22719 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22722 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22723 vqshls_n_s32 (int32x1_t __a, const int __b)
22725 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22728 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22729 vqshld_n_s64 (int64x1_t __a, const int __b)
22731 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22734 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22735 vqshlb_n_u8 (uint8x1_t __a, const int __b)
22737 return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22740 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22741 vqshlh_n_u16 (uint16x1_t __a, const int __b)
22743 return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22746 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22747 vqshls_n_u32 (uint32x1_t __a, const int __b)
22749 return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22752 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22753 vqshld_n_u64 (uint64x1_t __a, const int __b)
22755 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22758 /* vqshlu */
22760 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22761 vqshlu_n_s8 (int8x8_t __a, const int __b)
22763 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22766 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22767 vqshlu_n_s16 (int16x4_t __a, const int __b)
22769 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22772 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22773 vqshlu_n_s32 (int32x2_t __a, const int __b)
22775 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22778 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22779 vqshlu_n_s64 (int64x1_t __a, const int __b)
22781 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22784 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22785 vqshluq_n_s8 (int8x16_t __a, const int __b)
22787 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22790 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22791 vqshluq_n_s16 (int16x8_t __a, const int __b)
22793 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22796 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22797 vqshluq_n_s32 (int32x4_t __a, const int __b)
22799 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22802 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22803 vqshluq_n_s64 (int64x2_t __a, const int __b)
22805 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22808 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22809 vqshlub_n_s8 (int8x1_t __a, const int __b)
22811 return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22814 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22815 vqshluh_n_s16 (int16x1_t __a, const int __b)
22817 return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22820 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22821 vqshlus_n_s32 (int32x1_t __a, const int __b)
22823 return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22826 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22827 vqshlud_n_s64 (int64x1_t __a, const int __b)
22829 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22832 /* vqshrn */
22834 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22835 vqshrn_n_s16 (int16x8_t __a, const int __b)
22837 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22840 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22841 vqshrn_n_s32 (int32x4_t __a, const int __b)
22843 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22846 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22847 vqshrn_n_s64 (int64x2_t __a, const int __b)
22849 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22852 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22853 vqshrn_n_u16 (uint16x8_t __a, const int __b)
22855 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22858 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22859 vqshrn_n_u32 (uint32x4_t __a, const int __b)
22861 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22864 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22865 vqshrn_n_u64 (uint64x2_t __a, const int __b)
22867 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22870 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22871 vqshrnh_n_s16 (int16x1_t __a, const int __b)
22873 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22876 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22877 vqshrns_n_s32 (int32x1_t __a, const int __b)
22879 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
22882 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22883 vqshrnd_n_s64 (int64x1_t __a, const int __b)
22885 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
22888 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22889 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
22891 return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
22894 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22895 vqshrns_n_u32 (uint32x1_t __a, const int __b)
22897 return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
22900 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22901 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
22903 return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
22906 /* vqshrun */
22908 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22909 vqshrun_n_s16 (int16x8_t __a, const int __b)
22911 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
22914 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22915 vqshrun_n_s32 (int32x4_t __a, const int __b)
22917 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
22920 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22921 vqshrun_n_s64 (int64x2_t __a, const int __b)
22923 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
22926 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22927 vqshrunh_n_s16 (int16x1_t __a, const int __b)
22929 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
22932 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22933 vqshruns_n_s32 (int32x1_t __a, const int __b)
22935 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
22938 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22939 vqshrund_n_s64 (int64x1_t __a, const int __b)
22941 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
22944 /* vqsub */
22946 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22947 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
22949 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
22952 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22953 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
22955 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
22958 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22959 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
22961 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
22964 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22965 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
22967 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
22970 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22971 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
22973 return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
22976 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22977 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
22979 return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
22982 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22983 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
22985 return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
22988 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22989 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
22991 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
22994 /* vrecpe */
22996 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22997 vrecpes_f32 (float32_t __a)
22999 return __builtin_aarch64_frecpesf (__a);
23002 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
23003 vrecped_f64 (float64_t __a)
23005 return __builtin_aarch64_frecpedf (__a);
23008 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23009 vrecpe_f32 (float32x2_t __a)
23011 return __builtin_aarch64_frecpev2sf (__a);
23014 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23015 vrecpeq_f32 (float32x4_t __a)
23017 return __builtin_aarch64_frecpev4sf (__a);
23020 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23021 vrecpeq_f64 (float64x2_t __a)
23023 return __builtin_aarch64_frecpev2df (__a);
23026 /* vrecps */
23028 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
23029 vrecpss_f32 (float32_t __a, float32_t __b)
23031 return __builtin_aarch64_frecpssf (__a, __b);
23034 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
23035 vrecpsd_f64 (float64_t __a, float64_t __b)
23037 return __builtin_aarch64_frecpsdf (__a, __b);
23040 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23041 vrecps_f32 (float32x2_t __a, float32x2_t __b)
23043 return __builtin_aarch64_frecpsv2sf (__a, __b);
23046 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23047 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
23049 return __builtin_aarch64_frecpsv4sf (__a, __b);
23052 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23053 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
23055 return __builtin_aarch64_frecpsv2df (__a, __b);
23058 /* vrecpx */
23060 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
23061 vrecpxs_f32 (float32_t __a)
23063 return __builtin_aarch64_frecpxsf (__a);
23066 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
23067 vrecpxd_f64 (float64_t __a)
23069 return __builtin_aarch64_frecpxdf (__a);
23072 /* vrshl */
23074 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23075 vrshl_s8 (int8x8_t __a, int8x8_t __b)
23077 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
23080 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23081 vrshl_s16 (int16x4_t __a, int16x4_t __b)
23083 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
23086 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23087 vrshl_s32 (int32x2_t __a, int32x2_t __b)
23089 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
23092 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23093 vrshl_s64 (int64x1_t __a, int64x1_t __b)
23095 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23098 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23099 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
23101 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
23104 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23105 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
23107 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
23110 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23111 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
23113 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
23116 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23117 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
23119 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
23122 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23123 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
23125 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
23128 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23129 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
23131 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
23134 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23135 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
23137 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
23140 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23141 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
23143 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
23146 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23147 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
23149 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
23152 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23153 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
23155 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
23158 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23159 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
23161 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
23164 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23165 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
23167 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
23170 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23171 vrshld_s64 (int64x1_t __a, int64x1_t __b)
23173 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23176 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23177 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
23179 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
23182 /* vrshr */
23184 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23185 vrshr_n_s8 (int8x8_t __a, const int __b)
23187 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
23190 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23191 vrshr_n_s16 (int16x4_t __a, const int __b)
23193 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
23196 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23197 vrshr_n_s32 (int32x2_t __a, const int __b)
23199 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
23202 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23203 vrshr_n_s64 (int64x1_t __a, const int __b)
23205 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23208 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23209 vrshr_n_u8 (uint8x8_t __a, const int __b)
23211 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
23214 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23215 vrshr_n_u16 (uint16x4_t __a, const int __b)
23217 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
23220 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23221 vrshr_n_u32 (uint32x2_t __a, const int __b)
23223 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
23226 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23227 vrshr_n_u64 (uint64x1_t __a, const int __b)
23229 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
23232 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23233 vrshrq_n_s8 (int8x16_t __a, const int __b)
23235 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
23238 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23239 vrshrq_n_s16 (int16x8_t __a, const int __b)
23241 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
23244 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23245 vrshrq_n_s32 (int32x4_t __a, const int __b)
23247 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
23250 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23251 vrshrq_n_s64 (int64x2_t __a, const int __b)
23253 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
23256 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23257 vrshrq_n_u8 (uint8x16_t __a, const int __b)
23259 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
23262 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23263 vrshrq_n_u16 (uint16x8_t __a, const int __b)
23265 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
23268 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23269 vrshrq_n_u32 (uint32x4_t __a, const int __b)
23271 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
23274 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23275 vrshrq_n_u64 (uint64x2_t __a, const int __b)
23277 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
23280 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23281 vrshrd_n_s64 (int64x1_t __a, const int __b)
23283 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23286 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23287 vrshrd_n_u64 (uint64x1_t __a, const int __b)
23289 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
23292 /* vrsra */
23294 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23295 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23297 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
23300 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23301 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23303 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
23306 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23307 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23309 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
23312 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23313 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23315 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23318 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23319 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23321 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
23322 (int8x8_t) __b, __c);
23325 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23326 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23328 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
23329 (int16x4_t) __b, __c);
23332 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23333 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23335 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
23336 (int32x2_t) __b, __c);
23339 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23340 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23342 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
23343 (int64x1_t) __b, __c);
23346 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23347 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23349 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
23352 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23353 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23355 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
23358 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23359 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23361 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
23364 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23365 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23367 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
23370 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23371 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23373 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
23374 (int8x16_t) __b, __c);
23377 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23378 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23380 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
23381 (int16x8_t) __b, __c);
23384 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23385 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23387 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
23388 (int32x4_t) __b, __c);
23391 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23392 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23394 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
23395 (int64x2_t) __b, __c);
23398 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23399 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23401 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23404 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23405 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23407 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
23410 /* vshl */
23412 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23413 vshl_n_s8 (int8x8_t __a, const int __b)
23415 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
23418 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23419 vshl_n_s16 (int16x4_t __a, const int __b)
23421 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
23424 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23425 vshl_n_s32 (int32x2_t __a, const int __b)
23427 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
23430 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23431 vshl_n_s64 (int64x1_t __a, const int __b)
23433 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23436 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23437 vshl_n_u8 (uint8x8_t __a, const int __b)
23439 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
23442 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23443 vshl_n_u16 (uint16x4_t __a, const int __b)
23445 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
23448 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23449 vshl_n_u32 (uint32x2_t __a, const int __b)
23451 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
23454 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23455 vshl_n_u64 (uint64x1_t __a, const int __b)
23457 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
23460 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23461 vshlq_n_s8 (int8x16_t __a, const int __b)
23463 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
23466 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23467 vshlq_n_s16 (int16x8_t __a, const int __b)
23469 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
23472 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23473 vshlq_n_s32 (int32x4_t __a, const int __b)
23475 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
23478 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23479 vshlq_n_s64 (int64x2_t __a, const int __b)
23481 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
23484 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23485 vshlq_n_u8 (uint8x16_t __a, const int __b)
23487 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
23490 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23491 vshlq_n_u16 (uint16x8_t __a, const int __b)
23493 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
23496 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23497 vshlq_n_u32 (uint32x4_t __a, const int __b)
23499 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
23502 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23503 vshlq_n_u64 (uint64x2_t __a, const int __b)
23505 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
23508 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23509 vshld_n_s64 (int64x1_t __a, const int __b)
23511 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23514 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23515 vshld_n_u64 (uint64x1_t __a, const int __b)
23517 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
23520 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23521 vshl_s8 (int8x8_t __a, int8x8_t __b)
23523 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23526 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23527 vshl_s16 (int16x4_t __a, int16x4_t __b)
23529 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23532 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23533 vshl_s32 (int32x2_t __a, int32x2_t __b)
23535 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23538 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23539 vshl_s64 (int64x1_t __a, int64x1_t __b)
23541 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23544 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23545 vshl_u8 (uint8x8_t __a, int8x8_t __b)
23547 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23550 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23551 vshl_u16 (uint16x4_t __a, int16x4_t __b)
23553 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23556 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23557 vshl_u32 (uint32x2_t __a, int32x2_t __b)
23559 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23562 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23563 vshl_u64 (uint64x1_t __a, int64x1_t __b)
23565 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23568 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23569 vshlq_s8 (int8x16_t __a, int8x16_t __b)
23571 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23574 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23575 vshlq_s16 (int16x8_t __a, int16x8_t __b)
23577 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23580 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23581 vshlq_s32 (int32x4_t __a, int32x4_t __b)
23583 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23586 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23587 vshlq_s64 (int64x2_t __a, int64x2_t __b)
23589 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23592 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23593 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23595 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23598 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23599 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23601 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23604 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23605 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23607 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23610 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23611 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23613 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23616 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23617 vshld_s64 (int64x1_t __a, int64x1_t __b)
23619 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23622 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23623 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23625 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23628 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23629 vshll_high_n_s8 (int8x16_t __a, const int __b)
23631 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23634 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23635 vshll_high_n_s16 (int16x8_t __a, const int __b)
23637 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23640 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23641 vshll_high_n_s32 (int32x4_t __a, const int __b)
23643 return __builtin_aarch64_sshll2_nv4si (__a, __b);
23646 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23647 vshll_high_n_u8 (uint8x16_t __a, const int __b)
23649 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23652 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23653 vshll_high_n_u16 (uint16x8_t __a, const int __b)
23655 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23658 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23659 vshll_high_n_u32 (uint32x4_t __a, const int __b)
23661 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23664 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23665 vshll_n_s8 (int8x8_t __a, const int __b)
23667 return __builtin_aarch64_sshll_nv8qi (__a, __b);
23670 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23671 vshll_n_s16 (int16x4_t __a, const int __b)
23673 return __builtin_aarch64_sshll_nv4hi (__a, __b);
23676 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23677 vshll_n_s32 (int32x2_t __a, const int __b)
23679 return __builtin_aarch64_sshll_nv2si (__a, __b);
23682 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23683 vshll_n_u8 (uint8x8_t __a, const int __b)
23685 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23688 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23689 vshll_n_u16 (uint16x4_t __a, const int __b)
23691 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23694 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23695 vshll_n_u32 (uint32x2_t __a, const int __b)
23697 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23700 /* vshr */
23702 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23703 vshr_n_s8 (int8x8_t __a, const int __b)
23705 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
23708 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23709 vshr_n_s16 (int16x4_t __a, const int __b)
23711 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
23714 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23715 vshr_n_s32 (int32x2_t __a, const int __b)
23717 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
23720 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23721 vshr_n_s64 (int64x1_t __a, const int __b)
23723 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23726 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23727 vshr_n_u8 (uint8x8_t __a, const int __b)
23729 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
23732 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23733 vshr_n_u16 (uint16x4_t __a, const int __b)
23735 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
23738 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23739 vshr_n_u32 (uint32x2_t __a, const int __b)
23741 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
23744 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23745 vshr_n_u64 (uint64x1_t __a, const int __b)
23747 return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
23750 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23751 vshrq_n_s8 (int8x16_t __a, const int __b)
23753 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
23756 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23757 vshrq_n_s16 (int16x8_t __a, const int __b)
23759 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
23762 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23763 vshrq_n_s32 (int32x4_t __a, const int __b)
23765 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
23768 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23769 vshrq_n_s64 (int64x2_t __a, const int __b)
23771 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
23774 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23775 vshrq_n_u8 (uint8x16_t __a, const int __b)
23777 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
23780 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23781 vshrq_n_u16 (uint16x8_t __a, const int __b)
23783 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
23786 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23787 vshrq_n_u32 (uint32x4_t __a, const int __b)
23789 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
23792 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23793 vshrq_n_u64 (uint64x2_t __a, const int __b)
23795 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
23798 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23799 vshrd_n_s64 (int64x1_t __a, const int __b)
23801 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23804 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23805 vshrd_n_u64 (uint64x1_t __a, const int __b)
23807 return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
23810 /* vsli */
23812 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23813 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23815 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23818 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23819 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23821 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23824 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23825 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23827 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23830 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23831 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23833 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23836 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23837 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23839 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23840 (int8x8_t) __b, __c);
23843 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23844 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23846 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23847 (int16x4_t) __b, __c);
23850 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23851 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23853 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23854 (int32x2_t) __b, __c);
23857 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23858 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23860 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23861 (int64x1_t) __b, __c);
23864 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23865 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23867 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23870 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23871 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23873 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23876 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23877 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23879 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23882 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23883 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23885 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23888 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23889 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23891 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23892 (int8x16_t) __b, __c);
23895 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23896 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23898 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23899 (int16x8_t) __b, __c);
23902 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23903 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23905 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23906 (int32x4_t) __b, __c);
23909 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23910 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23912 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23913 (int64x2_t) __b, __c);
23916 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23917 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23919 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23922 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23923 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23925 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23928 /* vsqadd */
23930 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23931 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23933 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23934 (int8x8_t) __b);
23937 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23938 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23940 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23941 (int16x4_t) __b);
23944 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23945 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23947 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23948 (int32x2_t) __b);
23951 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23952 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23954 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23957 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23958 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
23960 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
23961 (int8x16_t) __b);
23964 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23965 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
23967 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
23968 (int16x8_t) __b);
23971 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23972 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
23974 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
23975 (int32x4_t) __b);
23978 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23979 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
23981 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
23982 (int64x2_t) __b);
23985 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
23986 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
23988 return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
23991 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
23992 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
23994 return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
23997 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
23998 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
24000 return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
24003 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24004 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
24006 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
24009 /* vsqrt */
24010 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24011 vsqrt_f32 (float32x2_t a)
24013 return __builtin_aarch64_sqrtv2sf (a);
24016 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24017 vsqrtq_f32 (float32x4_t a)
24019 return __builtin_aarch64_sqrtv4sf (a);
24022 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24023 vsqrtq_f64 (float64x2_t a)
24025 return __builtin_aarch64_sqrtv2df (a);
24028 /* vsra */
24030 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24031 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
24033 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
24036 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24037 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
24039 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
24042 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24043 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
24045 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
24048 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24049 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24051 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
24054 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24055 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
24057 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
24058 (int8x8_t) __b, __c);
24061 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24062 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
24064 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
24065 (int16x4_t) __b, __c);
24068 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24069 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
24071 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
24072 (int32x2_t) __b, __c);
24075 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24076 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24078 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
24079 (int64x1_t) __b, __c);
24082 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24083 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24085 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
24088 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24089 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24091 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
24094 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24095 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24097 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
24100 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24101 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24103 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
24106 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24107 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24109 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
24110 (int8x16_t) __b, __c);
24113 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24114 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24116 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
24117 (int16x8_t) __b, __c);
24120 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24121 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24123 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
24124 (int32x4_t) __b, __c);
24127 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24128 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24130 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
24131 (int64x2_t) __b, __c);
24134 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24135 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24137 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
24140 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24141 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24143 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
24146 /* vsri */
24148 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24149 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
24151 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
24154 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24155 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
24157 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
24160 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24161 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
24163 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
24166 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24167 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24169 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24172 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24173 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
24175 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
24176 (int8x8_t) __b, __c);
24179 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24180 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
24182 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
24183 (int16x4_t) __b, __c);
24186 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24187 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
24189 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
24190 (int32x2_t) __b, __c);
24193 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24194 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24196 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
24197 (int64x1_t) __b, __c);
24200 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24201 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24203 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
24206 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24207 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24209 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
24212 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24213 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24215 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
24218 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24219 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24221 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
24224 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24225 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24227 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
24228 (int8x16_t) __b, __c);
24231 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24232 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24234 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
24235 (int16x8_t) __b, __c);
24238 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24239 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24241 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
24242 (int32x4_t) __b, __c);
24245 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24246 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24248 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
24249 (int64x2_t) __b, __c);
24252 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24253 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24255 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24258 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24259 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24261 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
24264 /* vstn */
24266 __extension__ static __inline void
24267 vst2_s64 (int64_t * __a, int64x1x2_t val)
24269 __builtin_aarch64_simd_oi __o;
24270 int64x2x2_t temp;
24271 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24272 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24273 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24274 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24275 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24278 __extension__ static __inline void
24279 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
24281 __builtin_aarch64_simd_oi __o;
24282 uint64x2x2_t temp;
24283 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24284 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24285 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24286 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24287 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24290 __extension__ static __inline void
24291 vst2_f64 (float64_t * __a, float64x1x2_t val)
24293 __builtin_aarch64_simd_oi __o;
24294 float64x2x2_t temp;
24295 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24296 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24297 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24298 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24299 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24302 __extension__ static __inline void
24303 vst2_s8 (int8_t * __a, int8x8x2_t val)
24305 __builtin_aarch64_simd_oi __o;
24306 int8x16x2_t temp;
24307 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24308 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24309 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24310 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24311 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24314 __extension__ static __inline void __attribute__ ((__always_inline__))
24315 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24317 __builtin_aarch64_simd_oi __o;
24318 poly8x16x2_t temp;
24319 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24320 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24321 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24322 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24323 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24326 __extension__ static __inline void __attribute__ ((__always_inline__))
24327 vst2_s16 (int16_t * __a, int16x4x2_t val)
24329 __builtin_aarch64_simd_oi __o;
24330 int16x8x2_t temp;
24331 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24332 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24333 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24334 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24335 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24338 __extension__ static __inline void __attribute__ ((__always_inline__))
24339 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24341 __builtin_aarch64_simd_oi __o;
24342 poly16x8x2_t temp;
24343 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24344 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24345 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24346 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24347 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24350 __extension__ static __inline void __attribute__ ((__always_inline__))
24351 vst2_s32 (int32_t * __a, int32x2x2_t val)
24353 __builtin_aarch64_simd_oi __o;
24354 int32x4x2_t temp;
24355 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24356 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24357 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24358 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24359 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24362 __extension__ static __inline void __attribute__ ((__always_inline__))
24363 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24365 __builtin_aarch64_simd_oi __o;
24366 uint8x16x2_t temp;
24367 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24368 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24369 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24370 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24371 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24374 __extension__ static __inline void __attribute__ ((__always_inline__))
24375 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24377 __builtin_aarch64_simd_oi __o;
24378 uint16x8x2_t temp;
24379 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
24380 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
24381 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24382 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24383 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24386 __extension__ static __inline void __attribute__ ((__always_inline__))
24387 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24389 __builtin_aarch64_simd_oi __o;
24390 uint32x4x2_t temp;
24391 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
24392 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
24393 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24394 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24395 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24398 __extension__ static __inline void __attribute__ ((__always_inline__))
24399 vst2_f32 (float32_t * __a, float32x2x2_t val)
24401 __builtin_aarch64_simd_oi __o;
24402 float32x4x2_t temp;
24403 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
24404 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
24405 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24406 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24407 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24410 __extension__ static __inline void __attribute__ ((__always_inline__))
24411 vst2q_s8 (int8_t * __a, int8x16x2_t val)
24413 __builtin_aarch64_simd_oi __o;
24414 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24415 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24416 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24419 __extension__ static __inline void __attribute__ ((__always_inline__))
24420 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24422 __builtin_aarch64_simd_oi __o;
24423 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24424 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24425 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24428 __extension__ static __inline void __attribute__ ((__always_inline__))
24429 vst2q_s16 (int16_t * __a, int16x8x2_t val)
24431 __builtin_aarch64_simd_oi __o;
24432 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24433 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24434 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24437 __extension__ static __inline void __attribute__ ((__always_inline__))
24438 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24440 __builtin_aarch64_simd_oi __o;
24441 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24442 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24443 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24446 __extension__ static __inline void __attribute__ ((__always_inline__))
24447 vst2q_s32 (int32_t * __a, int32x4x2_t val)
24449 __builtin_aarch64_simd_oi __o;
24450 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24451 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24452 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24455 __extension__ static __inline void __attribute__ ((__always_inline__))
24456 vst2q_s64 (int64_t * __a, int64x2x2_t val)
24458 __builtin_aarch64_simd_oi __o;
24459 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24460 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24461 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24464 __extension__ static __inline void __attribute__ ((__always_inline__))
24465 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24467 __builtin_aarch64_simd_oi __o;
24468 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24469 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24470 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24473 __extension__ static __inline void __attribute__ ((__always_inline__))
24474 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24476 __builtin_aarch64_simd_oi __o;
24477 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24478 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24479 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24482 __extension__ static __inline void __attribute__ ((__always_inline__))
24483 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24485 __builtin_aarch64_simd_oi __o;
24486 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24487 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24488 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24491 __extension__ static __inline void __attribute__ ((__always_inline__))
24492 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24494 __builtin_aarch64_simd_oi __o;
24495 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24496 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24497 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24500 __extension__ static __inline void __attribute__ ((__always_inline__))
24501 vst2q_f32 (float32_t * __a, float32x4x2_t val)
24503 __builtin_aarch64_simd_oi __o;
24504 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24505 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24506 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24509 __extension__ static __inline void __attribute__ ((__always_inline__))
24510 vst2q_f64 (float64_t * __a, float64x2x2_t val)
24512 __builtin_aarch64_simd_oi __o;
24513 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24514 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24515 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24518 __extension__ static __inline void
24519 vst3_s64 (int64_t * __a, int64x1x3_t val)
24521 __builtin_aarch64_simd_ci __o;
24522 int64x2x3_t temp;
24523 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24524 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24525 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
24526 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24527 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24528 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24529 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24532 __extension__ static __inline void
24533 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24535 __builtin_aarch64_simd_ci __o;
24536 uint64x2x3_t temp;
24537 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24538 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24539 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
24540 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24541 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24542 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24543 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24546 __extension__ static __inline void
24547 vst3_f64 (float64_t * __a, float64x1x3_t val)
24549 __builtin_aarch64_simd_ci __o;
24550 float64x2x3_t temp;
24551 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24552 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24553 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
24554 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24555 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24556 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24557 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24560 __extension__ static __inline void
24561 vst3_s8 (int8_t * __a, int8x8x3_t val)
24563 __builtin_aarch64_simd_ci __o;
24564 int8x16x3_t temp;
24565 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24566 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24567 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
24568 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24569 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24570 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24571 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24574 __extension__ static __inline void __attribute__ ((__always_inline__))
24575 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24577 __builtin_aarch64_simd_ci __o;
24578 poly8x16x3_t temp;
24579 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24580 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24581 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
24582 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24583 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24584 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24585 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24588 __extension__ static __inline void __attribute__ ((__always_inline__))
24589 vst3_s16 (int16_t * __a, int16x4x3_t val)
24591 __builtin_aarch64_simd_ci __o;
24592 int16x8x3_t temp;
24593 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24594 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24595 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
24596 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24597 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24598 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24599 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24602 __extension__ static __inline void __attribute__ ((__always_inline__))
24603 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24605 __builtin_aarch64_simd_ci __o;
24606 poly16x8x3_t temp;
24607 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24608 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24609 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
24610 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24611 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24612 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24613 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24616 __extension__ static __inline void __attribute__ ((__always_inline__))
24617 vst3_s32 (int32_t * __a, int32x2x3_t val)
24619 __builtin_aarch64_simd_ci __o;
24620 int32x4x3_t temp;
24621 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24622 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24623 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
24624 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24625 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24626 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24627 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24630 __extension__ static __inline void __attribute__ ((__always_inline__))
24631 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24633 __builtin_aarch64_simd_ci __o;
24634 uint8x16x3_t temp;
24635 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24636 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24637 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
24638 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24639 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24640 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24641 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24644 __extension__ static __inline void __attribute__ ((__always_inline__))
24645 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24647 __builtin_aarch64_simd_ci __o;
24648 uint16x8x3_t temp;
24649 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
24650 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
24651 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
24652 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24653 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24654 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24655 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24658 __extension__ static __inline void __attribute__ ((__always_inline__))
24659 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24661 __builtin_aarch64_simd_ci __o;
24662 uint32x4x3_t temp;
24663 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
24664 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
24665 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
24666 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24667 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24668 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24669 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24672 __extension__ static __inline void __attribute__ ((__always_inline__))
24673 vst3_f32 (float32_t * __a, float32x2x3_t val)
24675 __builtin_aarch64_simd_ci __o;
24676 float32x4x3_t temp;
24677 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
24678 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
24679 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
24680 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24681 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24682 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24683 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24686 __extension__ static __inline void __attribute__ ((__always_inline__))
24687 vst3q_s8 (int8_t * __a, int8x16x3_t val)
24689 __builtin_aarch64_simd_ci __o;
24690 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24691 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24692 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24693 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24696 __extension__ static __inline void __attribute__ ((__always_inline__))
24697 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24699 __builtin_aarch64_simd_ci __o;
24700 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24701 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24702 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24703 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24706 __extension__ static __inline void __attribute__ ((__always_inline__))
24707 vst3q_s16 (int16_t * __a, int16x8x3_t val)
24709 __builtin_aarch64_simd_ci __o;
24710 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24711 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24712 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24713 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24716 __extension__ static __inline void __attribute__ ((__always_inline__))
24717 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24719 __builtin_aarch64_simd_ci __o;
24720 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24721 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24722 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24723 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24726 __extension__ static __inline void __attribute__ ((__always_inline__))
24727 vst3q_s32 (int32_t * __a, int32x4x3_t val)
24729 __builtin_aarch64_simd_ci __o;
24730 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24731 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24732 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24733 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24736 __extension__ static __inline void __attribute__ ((__always_inline__))
24737 vst3q_s64 (int64_t * __a, int64x2x3_t val)
24739 __builtin_aarch64_simd_ci __o;
24740 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24741 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24742 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24743 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24746 __extension__ static __inline void __attribute__ ((__always_inline__))
24747 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24749 __builtin_aarch64_simd_ci __o;
24750 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24751 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24752 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24753 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24756 __extension__ static __inline void __attribute__ ((__always_inline__))
24757 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24759 __builtin_aarch64_simd_ci __o;
24760 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24761 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24762 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24763 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24766 __extension__ static __inline void __attribute__ ((__always_inline__))
24767 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24769 __builtin_aarch64_simd_ci __o;
24770 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24771 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24772 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24773 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24776 __extension__ static __inline void __attribute__ ((__always_inline__))
24777 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24779 __builtin_aarch64_simd_ci __o;
24780 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24781 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24782 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24783 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24786 __extension__ static __inline void __attribute__ ((__always_inline__))
24787 vst3q_f32 (float32_t * __a, float32x4x3_t val)
24789 __builtin_aarch64_simd_ci __o;
24790 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24791 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24792 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24793 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24796 __extension__ static __inline void __attribute__ ((__always_inline__))
24797 vst3q_f64 (float64_t * __a, float64x2x3_t val)
24799 __builtin_aarch64_simd_ci __o;
24800 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24801 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24802 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24803 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24806 __extension__ static __inline void
24807 vst4_s64 (int64_t * __a, int64x1x4_t val)
24809 __builtin_aarch64_simd_xi __o;
24810 int64x2x4_t temp;
24811 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24812 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24813 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
24814 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (INT64_C (0)));
24815 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24816 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24817 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24818 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24819 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24822 __extension__ static __inline void
24823 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24825 __builtin_aarch64_simd_xi __o;
24826 uint64x2x4_t temp;
24827 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24828 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24829 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
24830 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (UINT64_C (0)));
24831 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24832 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24833 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24834 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24835 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24838 __extension__ static __inline void
24839 vst4_f64 (float64_t * __a, float64x1x4_t val)
24841 __builtin_aarch64_simd_xi __o;
24842 float64x2x4_t temp;
24843 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24844 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24845 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
24846 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (UINT64_C (0)));
24847 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24848 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24849 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24850 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24851 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24854 __extension__ static __inline void
24855 vst4_s8 (int8_t * __a, int8x8x4_t val)
24857 __builtin_aarch64_simd_xi __o;
24858 int8x16x4_t temp;
24859 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24860 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24861 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
24862 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (INT64_C (0)));
24863 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24864 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24865 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24866 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24867 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24870 __extension__ static __inline void __attribute__ ((__always_inline__))
24871 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
24873 __builtin_aarch64_simd_xi __o;
24874 poly8x16x4_t temp;
24875 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24876 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24877 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
24878 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (UINT64_C (0)));
24879 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24880 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24881 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24882 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24883 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24886 __extension__ static __inline void __attribute__ ((__always_inline__))
24887 vst4_s16 (int16_t * __a, int16x4x4_t val)
24889 __builtin_aarch64_simd_xi __o;
24890 int16x8x4_t temp;
24891 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24892 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24893 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
24894 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (INT64_C (0)));
24895 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24896 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24897 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24898 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24899 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24902 __extension__ static __inline void __attribute__ ((__always_inline__))
24903 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
24905 __builtin_aarch64_simd_xi __o;
24906 poly16x8x4_t temp;
24907 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24908 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24909 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
24910 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (UINT64_C (0)));
24911 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24912 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24913 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24914 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24915 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24918 __extension__ static __inline void __attribute__ ((__always_inline__))
24919 vst4_s32 (int32_t * __a, int32x2x4_t val)
24921 __builtin_aarch64_simd_xi __o;
24922 int32x4x4_t temp;
24923 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24924 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24925 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
24926 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (INT64_C (0)));
24927 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24928 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24929 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24930 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24931 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24934 __extension__ static __inline void __attribute__ ((__always_inline__))
24935 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
24937 __builtin_aarch64_simd_xi __o;
24938 uint8x16x4_t temp;
24939 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24940 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24941 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
24942 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (UINT64_C (0)));
24943 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
24944 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
24945 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
24946 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
24947 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24950 __extension__ static __inline void __attribute__ ((__always_inline__))
24951 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
24953 __builtin_aarch64_simd_xi __o;
24954 uint16x8x4_t temp;
24955 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
24956 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
24957 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
24958 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (UINT64_C (0)));
24959 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
24960 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
24961 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
24962 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
24963 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24966 __extension__ static __inline void __attribute__ ((__always_inline__))
24967 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
24969 __builtin_aarch64_simd_xi __o;
24970 uint32x4x4_t temp;
24971 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
24972 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
24973 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
24974 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (UINT64_C (0)));
24975 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
24976 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
24977 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
24978 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
24979 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
24982 __extension__ static __inline void __attribute__ ((__always_inline__))
24983 vst4_f32 (float32_t * __a, float32x2x4_t val)
24985 __builtin_aarch64_simd_xi __o;
24986 float32x4x4_t temp;
24987 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
24988 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
24989 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
24990 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (UINT64_C (0)));
24991 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
24992 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
24993 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
24994 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
24995 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24998 __extension__ static __inline void __attribute__ ((__always_inline__))
24999 vst4q_s8 (int8_t * __a, int8x16x4_t val)
25001 __builtin_aarch64_simd_xi __o;
25002 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25003 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25004 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25005 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25006 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25009 __extension__ static __inline void __attribute__ ((__always_inline__))
25010 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
25012 __builtin_aarch64_simd_xi __o;
25013 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25014 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25015 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25016 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25017 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25020 __extension__ static __inline void __attribute__ ((__always_inline__))
25021 vst4q_s16 (int16_t * __a, int16x8x4_t val)
25023 __builtin_aarch64_simd_xi __o;
25024 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25025 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25026 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25027 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25028 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25031 __extension__ static __inline void __attribute__ ((__always_inline__))
25032 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
25034 __builtin_aarch64_simd_xi __o;
25035 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25036 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25037 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25038 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25039 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25042 __extension__ static __inline void __attribute__ ((__always_inline__))
25043 vst4q_s32 (int32_t * __a, int32x4x4_t val)
25045 __builtin_aarch64_simd_xi __o;
25046 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25047 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25048 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25049 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25050 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25053 __extension__ static __inline void __attribute__ ((__always_inline__))
25054 vst4q_s64 (int64_t * __a, int64x2x4_t val)
25056 __builtin_aarch64_simd_xi __o;
25057 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25058 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25059 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25060 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25061 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25064 __extension__ static __inline void __attribute__ ((__always_inline__))
25065 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
25067 __builtin_aarch64_simd_xi __o;
25068 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25069 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25070 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25071 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25072 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25075 __extension__ static __inline void __attribute__ ((__always_inline__))
25076 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
25078 __builtin_aarch64_simd_xi __o;
25079 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25080 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25081 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25082 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25083 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25086 __extension__ static __inline void __attribute__ ((__always_inline__))
25087 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
25089 __builtin_aarch64_simd_xi __o;
25090 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25091 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25092 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25093 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25094 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25097 __extension__ static __inline void __attribute__ ((__always_inline__))
25098 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
25100 __builtin_aarch64_simd_xi __o;
25101 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25102 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25103 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25104 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25105 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25108 __extension__ static __inline void __attribute__ ((__always_inline__))
25109 vst4q_f32 (float32_t * __a, float32x4x4_t val)
25111 __builtin_aarch64_simd_xi __o;
25112 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
25113 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
25114 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
25115 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
25116 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
25119 __extension__ static __inline void __attribute__ ((__always_inline__))
25120 vst4q_f64 (float64_t * __a, float64x2x4_t val)
25122 __builtin_aarch64_simd_xi __o;
25123 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
25124 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
25125 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
25126 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
25127 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
25130 /* vsub */
25132 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25133 vsubd_s64 (int64x1_t __a, int64x1_t __b)
25135 return __a - __b;
25138 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25139 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
25141 return __a - __b;
25144 /* vtrn */
25146 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
25147 vtrn_f32 (float32x2_t a, float32x2_t b)
25149 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
25152 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
25153 vtrn_p8 (poly8x8_t a, poly8x8_t b)
25155 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
25158 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
25159 vtrn_p16 (poly16x4_t a, poly16x4_t b)
25161 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
25164 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
25165 vtrn_s8 (int8x8_t a, int8x8_t b)
25167 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
25170 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
25171 vtrn_s16 (int16x4_t a, int16x4_t b)
25173 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
25176 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
25177 vtrn_s32 (int32x2_t a, int32x2_t b)
25179 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
25182 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
25183 vtrn_u8 (uint8x8_t a, uint8x8_t b)
25185 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
25188 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
25189 vtrn_u16 (uint16x4_t a, uint16x4_t b)
25191 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
25194 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
25195 vtrn_u32 (uint32x2_t a, uint32x2_t b)
25197 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
25200 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
25201 vtrnq_f32 (float32x4_t a, float32x4_t b)
25203 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
25206 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
25207 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
25209 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
25212 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
25213 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
25215 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
25218 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
25219 vtrnq_s8 (int8x16_t a, int8x16_t b)
25221 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
25224 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
25225 vtrnq_s16 (int16x8_t a, int16x8_t b)
25227 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25230 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
25231 vtrnq_s32 (int32x4_t a, int32x4_t b)
25233 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25236 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
25237 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25239 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25242 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
25243 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25245 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25248 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
25249 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25251 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25254 /* vtst */
25256 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25257 vtst_s8 (int8x8_t __a, int8x8_t __b)
25259 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25262 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25263 vtst_s16 (int16x4_t __a, int16x4_t __b)
25265 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25268 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25269 vtst_s32 (int32x2_t __a, int32x2_t __b)
25271 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25274 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25275 vtst_s64 (int64x1_t __a, int64x1_t __b)
25277 return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
25280 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25281 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25283 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25284 (int8x8_t) __b);
25287 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25288 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25290 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25291 (int16x4_t) __b);
25294 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25295 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25297 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25298 (int32x2_t) __b);
25301 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25302 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25304 return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
25305 (int64x1_t) __b);
25308 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25309 vtstq_s8 (int8x16_t __a, int8x16_t __b)
25311 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25314 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25315 vtstq_s16 (int16x8_t __a, int16x8_t __b)
25317 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25320 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25321 vtstq_s32 (int32x4_t __a, int32x4_t __b)
25323 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25326 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25327 vtstq_s64 (int64x2_t __a, int64x2_t __b)
25329 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25332 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25333 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25335 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25336 (int8x16_t) __b);
25339 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25340 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25342 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25343 (int16x8_t) __b);
25346 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25347 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25349 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25350 (int32x4_t) __b);
25353 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25354 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25356 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25357 (int64x2_t) __b);
25360 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25361 vtstd_s64 (int64x1_t __a, int64x1_t __b)
25363 return (uint64x1_t) __builtin_aarch64_cmtstdi (__a, __b);
25366 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25367 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25369 return (uint64x1_t) __builtin_aarch64_cmtstdi ((int64x1_t) __a,
25370 (int64x1_t) __b);
25373 /* vuqadd */
25375 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25376 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25378 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25381 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25382 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25384 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25387 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25388 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25390 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25393 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25394 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25396 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25399 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25400 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25402 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25405 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25406 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25408 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25411 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25412 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25414 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25417 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25418 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25420 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25423 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
25424 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25426 return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25429 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
25430 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25432 return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25435 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
25436 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25438 return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25441 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25442 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25444 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25447 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25448 __extension__ static __inline rettype \
25449 __attribute__ ((__always_inline__)) \
25450 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25452 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25453 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25456 #define __INTERLEAVE_LIST(op) \
25457 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25458 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25459 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25460 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25461 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25462 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25463 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25464 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25465 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25466 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25467 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25468 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25469 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25470 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25471 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25472 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25473 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25474 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25476 /* vuzp */
25478 __INTERLEAVE_LIST (uzp)
25480 /* vzip */
25482 __INTERLEAVE_LIST (zip)
25484 #undef __INTERLEAVE_LIST
25485 #undef __DEFINTERLEAVE
25487 /* End of optimal implementations in approved order. */
25489 #endif