[AArch64] Fix name of macros called in the vdup_lane Neon intrinsics
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob4a480fb3da7a5dbd4976ce62b15d3a77284a6096
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2013 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 typedef __builtin_aarch64_simd_qi int8x8_t
33 __attribute__ ((__vector_size__ (8)));
34 typedef __builtin_aarch64_simd_hi int16x4_t
35 __attribute__ ((__vector_size__ (8)));
36 typedef __builtin_aarch64_simd_si int32x2_t
37 __attribute__ ((__vector_size__ (8)));
38 typedef int64_t int64x1_t;
39 typedef int32_t int32x1_t;
40 typedef int16_t int16x1_t;
41 typedef int8_t int8x1_t;
42 typedef double float64x1_t;
43 typedef __builtin_aarch64_simd_sf float32x2_t
44 __attribute__ ((__vector_size__ (8)));
45 typedef __builtin_aarch64_simd_poly8 poly8x8_t
46 __attribute__ ((__vector_size__ (8)));
47 typedef __builtin_aarch64_simd_poly16 poly16x4_t
48 __attribute__ ((__vector_size__ (8)));
49 typedef __builtin_aarch64_simd_uqi uint8x8_t
50 __attribute__ ((__vector_size__ (8)));
51 typedef __builtin_aarch64_simd_uhi uint16x4_t
52 __attribute__ ((__vector_size__ (8)));
53 typedef __builtin_aarch64_simd_usi uint32x2_t
54 __attribute__ ((__vector_size__ (8)));
55 typedef uint64_t uint64x1_t;
56 typedef uint32_t uint32x1_t;
57 typedef uint16_t uint16x1_t;
58 typedef uint8_t uint8x1_t;
59 typedef __builtin_aarch64_simd_qi int8x16_t
60 __attribute__ ((__vector_size__ (16)));
61 typedef __builtin_aarch64_simd_hi int16x8_t
62 __attribute__ ((__vector_size__ (16)));
63 typedef __builtin_aarch64_simd_si int32x4_t
64 __attribute__ ((__vector_size__ (16)));
65 typedef __builtin_aarch64_simd_di int64x2_t
66 __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_sf float32x4_t
68 __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_df float64x2_t
70 __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_poly8 poly8x16_t
72 __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_poly16 poly16x8_t
74 __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_uqi uint8x16_t
76 __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_uhi uint16x8_t
78 __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_usi uint32x4_t
80 __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_udi uint64x2_t
82 __attribute__ ((__vector_size__ (16)));
84 typedef float float32_t;
85 typedef double float64_t;
86 typedef __builtin_aarch64_simd_poly8 poly8_t;
87 typedef __builtin_aarch64_simd_poly16 poly16_t;
89 typedef struct int8x8x2_t
91 int8x8_t val[2];
92 } int8x8x2_t;
94 typedef struct int8x16x2_t
96 int8x16_t val[2];
97 } int8x16x2_t;
99 typedef struct int16x4x2_t
101 int16x4_t val[2];
102 } int16x4x2_t;
104 typedef struct int16x8x2_t
106 int16x8_t val[2];
107 } int16x8x2_t;
109 typedef struct int32x2x2_t
111 int32x2_t val[2];
112 } int32x2x2_t;
114 typedef struct int32x4x2_t
116 int32x4_t val[2];
117 } int32x4x2_t;
119 typedef struct int64x1x2_t
121 int64x1_t val[2];
122 } int64x1x2_t;
124 typedef struct int64x2x2_t
126 int64x2_t val[2];
127 } int64x2x2_t;
129 typedef struct uint8x8x2_t
131 uint8x8_t val[2];
132 } uint8x8x2_t;
134 typedef struct uint8x16x2_t
136 uint8x16_t val[2];
137 } uint8x16x2_t;
139 typedef struct uint16x4x2_t
141 uint16x4_t val[2];
142 } uint16x4x2_t;
144 typedef struct uint16x8x2_t
146 uint16x8_t val[2];
147 } uint16x8x2_t;
149 typedef struct uint32x2x2_t
151 uint32x2_t val[2];
152 } uint32x2x2_t;
154 typedef struct uint32x4x2_t
156 uint32x4_t val[2];
157 } uint32x4x2_t;
159 typedef struct uint64x1x2_t
161 uint64x1_t val[2];
162 } uint64x1x2_t;
164 typedef struct uint64x2x2_t
166 uint64x2_t val[2];
167 } uint64x2x2_t;
169 typedef struct float32x2x2_t
171 float32x2_t val[2];
172 } float32x2x2_t;
174 typedef struct float32x4x2_t
176 float32x4_t val[2];
177 } float32x4x2_t;
179 typedef struct float64x2x2_t
181 float64x2_t val[2];
182 } float64x2x2_t;
184 typedef struct float64x1x2_t
186 float64x1_t val[2];
187 } float64x1x2_t;
189 typedef struct poly8x8x2_t
191 poly8x8_t val[2];
192 } poly8x8x2_t;
194 typedef struct poly8x16x2_t
196 poly8x16_t val[2];
197 } poly8x16x2_t;
199 typedef struct poly16x4x2_t
201 poly16x4_t val[2];
202 } poly16x4x2_t;
204 typedef struct poly16x8x2_t
206 poly16x8_t val[2];
207 } poly16x8x2_t;
209 typedef struct int8x8x3_t
211 int8x8_t val[3];
212 } int8x8x3_t;
214 typedef struct int8x16x3_t
216 int8x16_t val[3];
217 } int8x16x3_t;
219 typedef struct int16x4x3_t
221 int16x4_t val[3];
222 } int16x4x3_t;
224 typedef struct int16x8x3_t
226 int16x8_t val[3];
227 } int16x8x3_t;
229 typedef struct int32x2x3_t
231 int32x2_t val[3];
232 } int32x2x3_t;
234 typedef struct int32x4x3_t
236 int32x4_t val[3];
237 } int32x4x3_t;
239 typedef struct int64x1x3_t
241 int64x1_t val[3];
242 } int64x1x3_t;
244 typedef struct int64x2x3_t
246 int64x2_t val[3];
247 } int64x2x3_t;
249 typedef struct uint8x8x3_t
251 uint8x8_t val[3];
252 } uint8x8x3_t;
254 typedef struct uint8x16x3_t
256 uint8x16_t val[3];
257 } uint8x16x3_t;
259 typedef struct uint16x4x3_t
261 uint16x4_t val[3];
262 } uint16x4x3_t;
264 typedef struct uint16x8x3_t
266 uint16x8_t val[3];
267 } uint16x8x3_t;
269 typedef struct uint32x2x3_t
271 uint32x2_t val[3];
272 } uint32x2x3_t;
274 typedef struct uint32x4x3_t
276 uint32x4_t val[3];
277 } uint32x4x3_t;
279 typedef struct uint64x1x3_t
281 uint64x1_t val[3];
282 } uint64x1x3_t;
284 typedef struct uint64x2x3_t
286 uint64x2_t val[3];
287 } uint64x2x3_t;
289 typedef struct float32x2x3_t
291 float32x2_t val[3];
292 } float32x2x3_t;
294 typedef struct float32x4x3_t
296 float32x4_t val[3];
297 } float32x4x3_t;
299 typedef struct float64x2x3_t
301 float64x2_t val[3];
302 } float64x2x3_t;
304 typedef struct float64x1x3_t
306 float64x1_t val[3];
307 } float64x1x3_t;
309 typedef struct poly8x8x3_t
311 poly8x8_t val[3];
312 } poly8x8x3_t;
314 typedef struct poly8x16x3_t
316 poly8x16_t val[3];
317 } poly8x16x3_t;
319 typedef struct poly16x4x3_t
321 poly16x4_t val[3];
322 } poly16x4x3_t;
324 typedef struct poly16x8x3_t
326 poly16x8_t val[3];
327 } poly16x8x3_t;
329 typedef struct int8x8x4_t
331 int8x8_t val[4];
332 } int8x8x4_t;
334 typedef struct int8x16x4_t
336 int8x16_t val[4];
337 } int8x16x4_t;
339 typedef struct int16x4x4_t
341 int16x4_t val[4];
342 } int16x4x4_t;
344 typedef struct int16x8x4_t
346 int16x8_t val[4];
347 } int16x8x4_t;
349 typedef struct int32x2x4_t
351 int32x2_t val[4];
352 } int32x2x4_t;
354 typedef struct int32x4x4_t
356 int32x4_t val[4];
357 } int32x4x4_t;
359 typedef struct int64x1x4_t
361 int64x1_t val[4];
362 } int64x1x4_t;
364 typedef struct int64x2x4_t
366 int64x2_t val[4];
367 } int64x2x4_t;
369 typedef struct uint8x8x4_t
371 uint8x8_t val[4];
372 } uint8x8x4_t;
374 typedef struct uint8x16x4_t
376 uint8x16_t val[4];
377 } uint8x16x4_t;
379 typedef struct uint16x4x4_t
381 uint16x4_t val[4];
382 } uint16x4x4_t;
384 typedef struct uint16x8x4_t
386 uint16x8_t val[4];
387 } uint16x8x4_t;
389 typedef struct uint32x2x4_t
391 uint32x2_t val[4];
392 } uint32x2x4_t;
394 typedef struct uint32x4x4_t
396 uint32x4_t val[4];
397 } uint32x4x4_t;
399 typedef struct uint64x1x4_t
401 uint64x1_t val[4];
402 } uint64x1x4_t;
404 typedef struct uint64x2x4_t
406 uint64x2_t val[4];
407 } uint64x2x4_t;
409 typedef struct float32x2x4_t
411 float32x2_t val[4];
412 } float32x2x4_t;
414 typedef struct float32x4x4_t
416 float32x4_t val[4];
417 } float32x4x4_t;
419 typedef struct float64x2x4_t
421 float64x2_t val[4];
422 } float64x2x4_t;
424 typedef struct float64x1x4_t
426 float64x1_t val[4];
427 } float64x1x4_t;
429 typedef struct poly8x8x4_t
431 poly8x8_t val[4];
432 } poly8x8x4_t;
434 typedef struct poly8x16x4_t
436 poly8x16_t val[4];
437 } poly8x16x4_t;
439 typedef struct poly16x4x4_t
441 poly16x4_t val[4];
442 } poly16x4x4_t;
444 typedef struct poly16x8x4_t
446 poly16x8_t val[4];
447 } poly16x8x4_t;
449 /* vget_lane internal macros. */
451 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
452 (__cast_ret \
453 __builtin_aarch64_get_lane##__size (__cast_a __a, __b))
455 #define __aarch64_vget_lane_f32(__a, __b) \
456 __aarch64_vget_lane_any (v2sf, , , __a, __b)
457 #define __aarch64_vget_lane_f64(__a, __b) (__a)
459 #define __aarch64_vget_lane_p8(__a, __b) \
460 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
461 #define __aarch64_vget_lane_p16(__a, __b) \
462 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
464 #define __aarch64_vget_lane_s8(__a, __b) \
465 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
466 #define __aarch64_vget_lane_s16(__a, __b) \
467 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
468 #define __aarch64_vget_lane_s32(__a, __b) \
469 __aarch64_vget_lane_any (v2si, , ,__a, __b)
470 #define __aarch64_vget_lane_s64(__a, __b) (__a)
472 #define __aarch64_vget_lane_u8(__a, __b) \
473 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
474 #define __aarch64_vget_lane_u16(__a, __b) \
475 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
476 #define __aarch64_vget_lane_u32(__a, __b) \
477 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
478 #define __aarch64_vget_lane_u64(__a, __b) (__a)
480 #define __aarch64_vgetq_lane_f32(__a, __b) \
481 __aarch64_vget_lane_any (v4sf, , , __a, __b)
482 #define __aarch64_vgetq_lane_f64(__a, __b) \
483 __aarch64_vget_lane_any (v2df, , , __a, __b)
485 #define __aarch64_vgetq_lane_p8(__a, __b) \
486 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
487 #define __aarch64_vgetq_lane_p16(__a, __b) \
488 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
490 #define __aarch64_vgetq_lane_s8(__a, __b) \
491 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
492 #define __aarch64_vgetq_lane_s16(__a, __b) \
493 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
494 #define __aarch64_vgetq_lane_s32(__a, __b) \
495 __aarch64_vget_lane_any (v4si, , ,__a, __b)
496 #define __aarch64_vgetq_lane_s64(__a, __b) \
497 __aarch64_vget_lane_any (v2di, , ,__a, __b)
499 #define __aarch64_vgetq_lane_u8(__a, __b) \
500 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
501 #define __aarch64_vgetq_lane_u16(__a, __b) \
502 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
503 #define __aarch64_vgetq_lane_u32(__a, __b) \
504 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
505 #define __aarch64_vgetq_lane_u64(__a, __b) \
506 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
508 /* vadd */
509 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
510 vadd_s8 (int8x8_t __a, int8x8_t __b)
512 return __a + __b;
515 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
516 vadd_s16 (int16x4_t __a, int16x4_t __b)
518 return __a + __b;
521 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
522 vadd_s32 (int32x2_t __a, int32x2_t __b)
524 return __a + __b;
527 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
528 vadd_f32 (float32x2_t __a, float32x2_t __b)
530 return __a + __b;
533 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
534 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
536 return __a + __b;
539 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
540 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
542 return __a + __b;
545 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
546 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
548 return __a + __b;
551 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
552 vadd_s64 (int64x1_t __a, int64x1_t __b)
554 return __a + __b;
557 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
558 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
560 return __a + __b;
563 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
564 vaddq_s8 (int8x16_t __a, int8x16_t __b)
566 return __a + __b;
569 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
570 vaddq_s16 (int16x8_t __a, int16x8_t __b)
572 return __a + __b;
575 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
576 vaddq_s32 (int32x4_t __a, int32x4_t __b)
578 return __a + __b;
581 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
582 vaddq_s64 (int64x2_t __a, int64x2_t __b)
584 return __a + __b;
587 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
588 vaddq_f32 (float32x4_t __a, float32x4_t __b)
590 return __a + __b;
593 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
594 vaddq_f64 (float64x2_t __a, float64x2_t __b)
596 return __a + __b;
599 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
600 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
602 return __a + __b;
605 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
606 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
608 return __a + __b;
611 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
612 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
614 return __a + __b;
617 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
618 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
620 return __a + __b;
623 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
624 vaddl_s8 (int8x8_t __a, int8x8_t __b)
626 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
629 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
630 vaddl_s16 (int16x4_t __a, int16x4_t __b)
632 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
635 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
636 vaddl_s32 (int32x2_t __a, int32x2_t __b)
638 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
641 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
642 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
644 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
645 (int8x8_t) __b);
648 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
649 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
651 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
652 (int16x4_t) __b);
655 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
656 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
658 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
659 (int32x2_t) __b);
662 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
663 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
665 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
668 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
669 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
671 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
674 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
675 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
677 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
680 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
681 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
683 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
684 (int8x16_t) __b);
687 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
688 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
690 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
691 (int16x8_t) __b);
694 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
695 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
697 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
698 (int32x4_t) __b);
701 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
702 vaddw_s8 (int16x8_t __a, int8x8_t __b)
704 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
707 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
708 vaddw_s16 (int32x4_t __a, int16x4_t __b)
710 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
713 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
714 vaddw_s32 (int64x2_t __a, int32x2_t __b)
716 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
719 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
720 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
722 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
723 (int8x8_t) __b);
726 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
727 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
729 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
730 (int16x4_t) __b);
733 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
734 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
736 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
737 (int32x2_t) __b);
740 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
741 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
743 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
746 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
747 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
749 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
752 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
753 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
755 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
758 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
759 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
761 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
762 (int8x16_t) __b);
765 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
766 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
768 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
769 (int16x8_t) __b);
772 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
773 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
775 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
776 (int32x4_t) __b);
779 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
780 vhadd_s8 (int8x8_t __a, int8x8_t __b)
782 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
785 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
786 vhadd_s16 (int16x4_t __a, int16x4_t __b)
788 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
791 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
792 vhadd_s32 (int32x2_t __a, int32x2_t __b)
794 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
797 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
798 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
800 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
801 (int8x8_t) __b);
804 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
805 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
807 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
808 (int16x4_t) __b);
811 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
812 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
814 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
815 (int32x2_t) __b);
818 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
819 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
821 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
824 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
825 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
827 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
830 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
831 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
833 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
836 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
837 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
839 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
840 (int8x16_t) __b);
843 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
844 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
846 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
847 (int16x8_t) __b);
850 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
851 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
853 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
854 (int32x4_t) __b);
857 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
858 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
860 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
863 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
864 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
866 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
869 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
870 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
872 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
875 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
876 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
878 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
879 (int8x8_t) __b);
882 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
883 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
885 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
886 (int16x4_t) __b);
889 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
890 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
892 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
893 (int32x2_t) __b);
896 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
897 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
899 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
902 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
903 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
905 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
908 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
909 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
911 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
914 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
915 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
917 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
918 (int8x16_t) __b);
921 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
922 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
924 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
925 (int16x8_t) __b);
928 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
929 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
931 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
932 (int32x4_t) __b);
935 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
936 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
938 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
941 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
942 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
944 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
947 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
948 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
950 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
953 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
954 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
956 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
957 (int16x8_t) __b);
960 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
961 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
963 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
964 (int32x4_t) __b);
967 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
968 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
970 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
971 (int64x2_t) __b);
974 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
975 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
977 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
980 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
981 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
983 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
986 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
987 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
989 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
992 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
993 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
995 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
996 (int16x8_t) __b);
999 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1000 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1002 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1003 (int32x4_t) __b);
1006 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1007 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1009 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1010 (int64x2_t) __b);
1013 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1014 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1016 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1019 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1020 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1022 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1025 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1026 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1028 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1031 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1032 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1034 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1035 (int16x8_t) __b,
1036 (int16x8_t) __c);
1039 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1040 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1042 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1043 (int32x4_t) __b,
1044 (int32x4_t) __c);
1047 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1048 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1050 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1051 (int64x2_t) __b,
1052 (int64x2_t) __c);
1055 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1056 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1058 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1061 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1062 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1064 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1067 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1068 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1070 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1073 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1074 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1076 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1077 (int16x8_t) __b,
1078 (int16x8_t) __c);
1081 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1082 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1084 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1085 (int32x4_t) __b,
1086 (int32x4_t) __c);
1089 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1090 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1092 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1093 (int64x2_t) __b,
1094 (int64x2_t) __c);
1097 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1098 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1100 return __a / __b;
1103 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1104 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1106 return __a / __b;
1109 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1110 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1112 return __a / __b;
1115 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1116 vmul_s8 (int8x8_t __a, int8x8_t __b)
1118 return __a * __b;
1121 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1122 vmul_s16 (int16x4_t __a, int16x4_t __b)
1124 return __a * __b;
1127 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1128 vmul_s32 (int32x2_t __a, int32x2_t __b)
1130 return __a * __b;
1133 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1134 vmul_f32 (float32x2_t __a, float32x2_t __b)
1136 return __a * __b;
1139 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1140 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1142 return __a * __b;
1145 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1146 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1148 return __a * __b;
1151 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1152 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1154 return __a * __b;
1157 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1158 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1160 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1161 (int8x8_t) __b);
1164 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1165 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1167 return __a * __b;
1170 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1171 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1173 return __a * __b;
1176 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1177 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1179 return __a * __b;
1182 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1183 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1185 return __a * __b;
1188 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1189 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1191 return __a * __b;
1194 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1195 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1197 return __a * __b;
1200 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1201 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1203 return __a * __b;
1206 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1207 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1209 return __a * __b;
1212 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1213 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1215 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1216 (int8x16_t) __b);
1219 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1220 vand_s8 (int8x8_t __a, int8x8_t __b)
1222 return __a & __b;
1225 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1226 vand_s16 (int16x4_t __a, int16x4_t __b)
1228 return __a & __b;
1231 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1232 vand_s32 (int32x2_t __a, int32x2_t __b)
1234 return __a & __b;
1237 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1238 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1240 return __a & __b;
1243 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1244 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1246 return __a & __b;
1249 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1250 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1252 return __a & __b;
1255 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1256 vand_s64 (int64x1_t __a, int64x1_t __b)
1258 return __a & __b;
1261 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1262 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1264 return __a & __b;
1267 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1268 vandq_s8 (int8x16_t __a, int8x16_t __b)
1270 return __a & __b;
1273 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1274 vandq_s16 (int16x8_t __a, int16x8_t __b)
1276 return __a & __b;
1279 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1280 vandq_s32 (int32x4_t __a, int32x4_t __b)
1282 return __a & __b;
1285 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1286 vandq_s64 (int64x2_t __a, int64x2_t __b)
1288 return __a & __b;
1291 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1292 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1294 return __a & __b;
1297 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1298 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1300 return __a & __b;
1303 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1304 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1306 return __a & __b;
1309 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1310 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1312 return __a & __b;
1315 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1316 vorr_s8 (int8x8_t __a, int8x8_t __b)
1318 return __a | __b;
1321 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1322 vorr_s16 (int16x4_t __a, int16x4_t __b)
1324 return __a | __b;
1327 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1328 vorr_s32 (int32x2_t __a, int32x2_t __b)
1330 return __a | __b;
1333 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1334 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1336 return __a | __b;
1339 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1340 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1342 return __a | __b;
1345 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1346 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1348 return __a | __b;
1351 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1352 vorr_s64 (int64x1_t __a, int64x1_t __b)
1354 return __a | __b;
1357 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1358 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1360 return __a | __b;
1363 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1364 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1366 return __a | __b;
1369 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1370 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1372 return __a | __b;
1375 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1376 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1378 return __a | __b;
1381 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1382 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1384 return __a | __b;
1387 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1388 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1390 return __a | __b;
1393 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1394 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1396 return __a | __b;
1399 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1400 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1402 return __a | __b;
1405 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1406 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1408 return __a | __b;
1411 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1412 veor_s8 (int8x8_t __a, int8x8_t __b)
1414 return __a ^ __b;
1417 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1418 veor_s16 (int16x4_t __a, int16x4_t __b)
1420 return __a ^ __b;
1423 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1424 veor_s32 (int32x2_t __a, int32x2_t __b)
1426 return __a ^ __b;
1429 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1430 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1432 return __a ^ __b;
1435 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1436 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1438 return __a ^ __b;
1441 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1442 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1444 return __a ^ __b;
1447 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1448 veor_s64 (int64x1_t __a, int64x1_t __b)
1450 return __a ^ __b;
1453 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1454 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1456 return __a ^ __b;
1459 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1460 veorq_s8 (int8x16_t __a, int8x16_t __b)
1462 return __a ^ __b;
1465 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1466 veorq_s16 (int16x8_t __a, int16x8_t __b)
1468 return __a ^ __b;
1471 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1472 veorq_s32 (int32x4_t __a, int32x4_t __b)
1474 return __a ^ __b;
1477 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1478 veorq_s64 (int64x2_t __a, int64x2_t __b)
1480 return __a ^ __b;
1483 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1484 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1486 return __a ^ __b;
1489 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1490 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1492 return __a ^ __b;
1495 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1496 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1498 return __a ^ __b;
1501 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1502 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1504 return __a ^ __b;
1507 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1508 vbic_s8 (int8x8_t __a, int8x8_t __b)
1510 return __a & ~__b;
1513 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1514 vbic_s16 (int16x4_t __a, int16x4_t __b)
1516 return __a & ~__b;
1519 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1520 vbic_s32 (int32x2_t __a, int32x2_t __b)
1522 return __a & ~__b;
1525 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1526 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1528 return __a & ~__b;
1531 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1532 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1534 return __a & ~__b;
1537 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1538 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1540 return __a & ~__b;
1543 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1544 vbic_s64 (int64x1_t __a, int64x1_t __b)
1546 return __a & ~__b;
1549 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1550 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1552 return __a & ~__b;
1555 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1556 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1558 return __a & ~__b;
1561 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1562 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1564 return __a & ~__b;
1567 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1568 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1570 return __a & ~__b;
1573 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1574 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1576 return __a & ~__b;
1579 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1580 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1582 return __a & ~__b;
1585 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1586 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1588 return __a & ~__b;
1591 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1592 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1594 return __a & ~__b;
1597 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1598 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1600 return __a & ~__b;
1603 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1604 vorn_s8 (int8x8_t __a, int8x8_t __b)
1606 return __a | ~__b;
1609 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1610 vorn_s16 (int16x4_t __a, int16x4_t __b)
1612 return __a | ~__b;
1615 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1616 vorn_s32 (int32x2_t __a, int32x2_t __b)
1618 return __a | ~__b;
1621 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1622 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1624 return __a | ~__b;
1627 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1628 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1630 return __a | ~__b;
1633 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1634 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1636 return __a | ~__b;
1639 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1640 vorn_s64 (int64x1_t __a, int64x1_t __b)
1642 return __a | ~__b;
1645 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1646 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1648 return __a | ~__b;
1651 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1652 vornq_s8 (int8x16_t __a, int8x16_t __b)
1654 return __a | ~__b;
1657 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1658 vornq_s16 (int16x8_t __a, int16x8_t __b)
1660 return __a | ~__b;
1663 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1664 vornq_s32 (int32x4_t __a, int32x4_t __b)
1666 return __a | ~__b;
1669 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1670 vornq_s64 (int64x2_t __a, int64x2_t __b)
1672 return __a | ~__b;
1675 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1676 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1678 return __a | ~__b;
1681 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1682 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1684 return __a | ~__b;
1687 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1688 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1690 return __a | ~__b;
1693 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1694 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1696 return __a | ~__b;
1699 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1700 vsub_s8 (int8x8_t __a, int8x8_t __b)
1702 return __a - __b;
1705 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1706 vsub_s16 (int16x4_t __a, int16x4_t __b)
1708 return __a - __b;
1711 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1712 vsub_s32 (int32x2_t __a, int32x2_t __b)
1714 return __a - __b;
1717 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1718 vsub_f32 (float32x2_t __a, float32x2_t __b)
1720 return __a - __b;
1723 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1724 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1726 return __a - __b;
1729 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1730 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1732 return __a - __b;
1735 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1736 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1738 return __a - __b;
1741 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1742 vsub_s64 (int64x1_t __a, int64x1_t __b)
1744 return __a - __b;
1747 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1748 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1750 return __a - __b;
1753 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1754 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1756 return __a - __b;
1759 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1760 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1762 return __a - __b;
1765 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1766 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1768 return __a - __b;
1771 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1772 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1774 return __a - __b;
1777 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1778 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1780 return __a - __b;
1783 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1784 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1786 return __a - __b;
1789 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1790 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1792 return __a - __b;
1795 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1796 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1798 return __a - __b;
1801 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1802 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1804 return __a - __b;
1807 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1808 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1810 return __a - __b;
1813 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1814 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1816 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1819 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1820 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1822 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1825 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1826 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1828 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1831 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1832 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1834 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1835 (int8x8_t) __b);
1838 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1839 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1841 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1842 (int16x4_t) __b);
1845 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1846 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
1848 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
1849 (int32x2_t) __b);
1852 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1853 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
1855 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
1858 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1859 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
1861 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
1864 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1865 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
1867 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
1870 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1871 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
1873 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
1874 (int8x16_t) __b);
1877 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1878 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
1880 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
1881 (int16x8_t) __b);
1884 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1885 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
1887 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
1888 (int32x4_t) __b);
1891 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1892 vsubw_s8 (int16x8_t __a, int8x8_t __b)
1894 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
1897 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1898 vsubw_s16 (int32x4_t __a, int16x4_t __b)
1900 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
1903 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1904 vsubw_s32 (int64x2_t __a, int32x2_t __b)
1906 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
1909 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1910 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
1912 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
1913 (int8x8_t) __b);
1916 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1917 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
1919 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
1920 (int16x4_t) __b);
1923 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1924 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
1926 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
1927 (int32x2_t) __b);
1930 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1931 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
1933 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
1936 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1937 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
1939 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
1942 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1943 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
1945 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
1948 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1949 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
1951 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
1952 (int8x16_t) __b);
1955 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1956 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
1958 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
1959 (int16x8_t) __b);
1962 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1963 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
1965 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
1966 (int32x4_t) __b);
1969 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1970 vqadd_s8 (int8x8_t __a, int8x8_t __b)
1972 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
1975 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1976 vqadd_s16 (int16x4_t __a, int16x4_t __b)
1978 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
1981 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1982 vqadd_s32 (int32x2_t __a, int32x2_t __b)
1984 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
1987 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1988 vqadd_s64 (int64x1_t __a, int64x1_t __b)
1990 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
1993 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1994 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
1996 return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a,
1997 (int8x8_t) __b);
2000 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2001 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2003 return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a,
2004 (int16x4_t) __b);
2007 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2008 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2010 return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a,
2011 (int32x2_t) __b);
2014 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2015 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2017 return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a,
2018 (int64x1_t) __b);
2021 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2022 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2024 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2027 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2028 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2030 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2033 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2034 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2036 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2039 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2040 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2042 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2045 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2046 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2048 return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a,
2049 (int8x16_t) __b);
2052 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2053 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2055 return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a,
2056 (int16x8_t) __b);
2059 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2060 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2062 return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a,
2063 (int32x4_t) __b);
2066 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2067 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2069 return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a,
2070 (int64x2_t) __b);
2073 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2074 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2076 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2079 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2080 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2082 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2085 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2086 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2088 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2091 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2092 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2094 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
2097 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2098 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2100 return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a,
2101 (int8x8_t) __b);
2104 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2105 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2107 return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a,
2108 (int16x4_t) __b);
2111 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2112 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2114 return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a,
2115 (int32x2_t) __b);
2118 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2119 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2121 return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a,
2122 (int64x1_t) __b);
2125 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2126 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2128 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2131 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2132 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2134 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2137 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2138 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2140 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2143 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2144 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2146 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2149 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2150 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2152 return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a,
2153 (int8x16_t) __b);
2156 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2157 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2159 return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a,
2160 (int16x8_t) __b);
2163 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2164 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2166 return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a,
2167 (int32x4_t) __b);
2170 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2171 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2173 return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a,
2174 (int64x2_t) __b);
2177 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2178 vqneg_s8 (int8x8_t __a)
2180 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2183 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2184 vqneg_s16 (int16x4_t __a)
2186 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2189 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2190 vqneg_s32 (int32x2_t __a)
2192 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2195 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2196 vqnegq_s8 (int8x16_t __a)
2198 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2201 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2202 vqnegq_s16 (int16x8_t __a)
2204 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2207 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2208 vqnegq_s32 (int32x4_t __a)
2210 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2213 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2214 vqabs_s8 (int8x8_t __a)
2216 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2219 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2220 vqabs_s16 (int16x4_t __a)
2222 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2225 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2226 vqabs_s32 (int32x2_t __a)
2228 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2231 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2232 vqabsq_s8 (int8x16_t __a)
2234 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2237 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2238 vqabsq_s16 (int16x8_t __a)
2240 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2243 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2244 vqabsq_s32 (int32x4_t __a)
2246 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2249 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2250 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2252 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2255 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2256 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2258 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2261 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2262 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2264 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2267 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2268 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2270 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2273 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2274 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2276 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2279 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2280 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2282 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2285 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2286 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2288 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2291 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2292 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2294 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2297 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2298 vcreate_s8 (uint64_t __a)
2300 return (int8x8_t) __a;
2303 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2304 vcreate_s16 (uint64_t __a)
2306 return (int16x4_t) __a;
2309 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2310 vcreate_s32 (uint64_t __a)
2312 return (int32x2_t) __a;
2315 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2316 vcreate_s64 (uint64_t __a)
2318 return (int64x1_t) __a;
2321 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2322 vcreate_f32 (uint64_t __a)
2324 return (float32x2_t) __a;
2327 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2328 vcreate_u8 (uint64_t __a)
2330 return (uint8x8_t) __a;
2333 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2334 vcreate_u16 (uint64_t __a)
2336 return (uint16x4_t) __a;
2339 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2340 vcreate_u32 (uint64_t __a)
2342 return (uint32x2_t) __a;
2345 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2346 vcreate_u64 (uint64_t __a)
2348 return (uint64x1_t) __a;
2351 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2352 vcreate_f64 (uint64_t __a)
2354 return (float64x1_t) __builtin_aarch64_createdf (__a);
2357 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2358 vcreate_p8 (uint64_t __a)
2360 return (poly8x8_t) __a;
2363 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2364 vcreate_p16 (uint64_t __a)
2366 return (poly16x4_t) __a;
2369 /* vget_lane */
2371 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2372 vget_lane_f32 (float32x2_t __a, const int __b)
2374 return __aarch64_vget_lane_f32 (__a, __b);
2377 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2378 vget_lane_f64 (float64x1_t __a, const int __b)
2380 return __aarch64_vget_lane_f64 (__a, __b);
2383 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2384 vget_lane_p8 (poly8x8_t __a, const int __b)
2386 return __aarch64_vget_lane_p8 (__a, __b);
2389 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2390 vget_lane_p16 (poly16x4_t __a, const int __b)
2392 return __aarch64_vget_lane_p16 (__a, __b);
2395 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2396 vget_lane_s8 (int8x8_t __a, const int __b)
2398 return __aarch64_vget_lane_s8 (__a, __b);
2401 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2402 vget_lane_s16 (int16x4_t __a, const int __b)
2404 return __aarch64_vget_lane_s16 (__a, __b);
2407 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2408 vget_lane_s32 (int32x2_t __a, const int __b)
2410 return __aarch64_vget_lane_s32 (__a, __b);
2413 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2414 vget_lane_s64 (int64x1_t __a, const int __b)
2416 return __aarch64_vget_lane_s64 (__a, __b);
2419 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2420 vget_lane_u8 (uint8x8_t __a, const int __b)
2422 return __aarch64_vget_lane_u8 (__a, __b);
2425 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2426 vget_lane_u16 (uint16x4_t __a, const int __b)
2428 return __aarch64_vget_lane_u16 (__a, __b);
2431 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2432 vget_lane_u32 (uint32x2_t __a, const int __b)
2434 return __aarch64_vget_lane_u32 (__a, __b);
2437 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2438 vget_lane_u64 (uint64x1_t __a, const int __b)
2440 return __aarch64_vget_lane_u64 (__a, __b);
2443 /* vgetq_lane */
2445 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2446 vgetq_lane_f32 (float32x4_t __a, const int __b)
2448 return __aarch64_vgetq_lane_f32 (__a, __b);
2451 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2452 vgetq_lane_f64 (float64x2_t __a, const int __b)
2454 return __aarch64_vgetq_lane_f64 (__a, __b);
2457 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2458 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2460 return __aarch64_vgetq_lane_p8 (__a, __b);
2463 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2464 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2466 return __aarch64_vgetq_lane_p16 (__a, __b);
2469 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2470 vgetq_lane_s8 (int8x16_t __a, const int __b)
2472 return __aarch64_vgetq_lane_s8 (__a, __b);
2475 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2476 vgetq_lane_s16 (int16x8_t __a, const int __b)
2478 return __aarch64_vgetq_lane_s16 (__a, __b);
2481 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2482 vgetq_lane_s32 (int32x4_t __a, const int __b)
2484 return __aarch64_vgetq_lane_s32 (__a, __b);
2487 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2488 vgetq_lane_s64 (int64x2_t __a, const int __b)
2490 return __aarch64_vgetq_lane_s64 (__a, __b);
2493 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2494 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2496 return __aarch64_vgetq_lane_u8 (__a, __b);
2499 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2500 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2502 return __aarch64_vgetq_lane_u16 (__a, __b);
2505 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2506 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2508 return __aarch64_vgetq_lane_u32 (__a, __b);
2511 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2512 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2514 return __aarch64_vgetq_lane_u64 (__a, __b);
2517 /* vreinterpret */
2519 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2520 vreinterpret_p8_s8 (int8x8_t __a)
2522 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
2525 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2526 vreinterpret_p8_s16 (int16x4_t __a)
2528 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
2531 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2532 vreinterpret_p8_s32 (int32x2_t __a)
2534 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
2537 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2538 vreinterpret_p8_s64 (int64x1_t __a)
2540 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
2543 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2544 vreinterpret_p8_f32 (float32x2_t __a)
2546 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
2549 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2550 vreinterpret_p8_u8 (uint8x8_t __a)
2552 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
2555 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2556 vreinterpret_p8_u16 (uint16x4_t __a)
2558 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2561 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2562 vreinterpret_p8_u32 (uint32x2_t __a)
2564 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
2567 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2568 vreinterpret_p8_u64 (uint64x1_t __a)
2570 return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
2573 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2574 vreinterpret_p8_p16 (poly16x4_t __a)
2576 return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
2579 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2580 vreinterpretq_p8_s8 (int8x16_t __a)
2582 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
2585 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2586 vreinterpretq_p8_s16 (int16x8_t __a)
2588 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
2591 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2592 vreinterpretq_p8_s32 (int32x4_t __a)
2594 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
2597 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2598 vreinterpretq_p8_s64 (int64x2_t __a)
2600 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
2603 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2604 vreinterpretq_p8_f32 (float32x4_t __a)
2606 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
2609 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2610 vreinterpretq_p8_u8 (uint8x16_t __a)
2612 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
2613 __a);
2616 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2617 vreinterpretq_p8_u16 (uint16x8_t __a)
2619 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2620 __a);
2623 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2624 vreinterpretq_p8_u32 (uint32x4_t __a)
2626 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
2627 __a);
2630 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2631 vreinterpretq_p8_u64 (uint64x2_t __a)
2633 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
2634 __a);
2637 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2638 vreinterpretq_p8_p16 (poly16x8_t __a)
2640 return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
2641 __a);
2644 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2645 vreinterpret_p16_s8 (int8x8_t __a)
2647 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
2650 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2651 vreinterpret_p16_s16 (int16x4_t __a)
2653 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
2656 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2657 vreinterpret_p16_s32 (int32x2_t __a)
2659 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
2662 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2663 vreinterpret_p16_s64 (int64x1_t __a)
2665 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
2668 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2669 vreinterpret_p16_f32 (float32x2_t __a)
2671 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
2674 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2675 vreinterpret_p16_u8 (uint8x8_t __a)
2677 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2680 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2681 vreinterpret_p16_u16 (uint16x4_t __a)
2683 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
2686 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2687 vreinterpret_p16_u32 (uint32x2_t __a)
2689 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
2692 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2693 vreinterpret_p16_u64 (uint64x1_t __a)
2695 return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
2698 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2699 vreinterpret_p16_p8 (poly8x8_t __a)
2701 return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
2704 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2705 vreinterpretq_p16_s8 (int8x16_t __a)
2707 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
2710 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2711 vreinterpretq_p16_s16 (int16x8_t __a)
2713 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
2716 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2717 vreinterpretq_p16_s32 (int32x4_t __a)
2719 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
2722 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2723 vreinterpretq_p16_s64 (int64x2_t __a)
2725 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
2728 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2729 vreinterpretq_p16_f32 (float32x4_t __a)
2731 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
2734 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2735 vreinterpretq_p16_u8 (uint8x16_t __a)
2737 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2738 __a);
2741 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2742 vreinterpretq_p16_u16 (uint16x8_t __a)
2744 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
2747 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2748 vreinterpretq_p16_u32 (uint32x4_t __a)
2750 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
2753 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2754 vreinterpretq_p16_u64 (uint64x2_t __a)
2756 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
2759 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2760 vreinterpretq_p16_p8 (poly8x16_t __a)
2762 return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
2763 __a);
2766 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2767 vreinterpret_f32_s8 (int8x8_t __a)
2769 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a);
2772 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2773 vreinterpret_f32_s16 (int16x4_t __a)
2775 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a);
2778 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2779 vreinterpret_f32_s32 (int32x2_t __a)
2781 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a);
2784 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2785 vreinterpret_f32_s64 (int64x1_t __a)
2787 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a);
2790 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2791 vreinterpret_f32_u8 (uint8x8_t __a)
2793 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2796 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2797 vreinterpret_f32_u16 (uint16x4_t __a)
2799 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2800 __a);
2803 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2804 vreinterpret_f32_u32 (uint32x2_t __a)
2806 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t)
2807 __a);
2810 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2811 vreinterpret_f32_u64 (uint64x1_t __a)
2813 return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a);
2816 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2817 vreinterpret_f32_p8 (poly8x8_t __a)
2819 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a);
2822 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2823 vreinterpret_f32_p16 (poly16x4_t __a)
2825 return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t)
2826 __a);
2829 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2830 vreinterpretq_f32_s8 (int8x16_t __a)
2832 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a);
2835 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2836 vreinterpretq_f32_s16 (int16x8_t __a)
2838 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a);
2841 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2842 vreinterpretq_f32_s32 (int32x4_t __a)
2844 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a);
2847 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2848 vreinterpretq_f32_s64 (int64x2_t __a)
2850 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a);
2853 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2854 vreinterpretq_f32_u8 (uint8x16_t __a)
2856 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2857 __a);
2860 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2861 vreinterpretq_f32_u16 (uint16x8_t __a)
2863 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2864 __a);
2867 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2868 vreinterpretq_f32_u32 (uint32x4_t __a)
2870 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t)
2871 __a);
2874 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2875 vreinterpretq_f32_u64 (uint64x2_t __a)
2877 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t)
2878 __a);
2881 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2882 vreinterpretq_f32_p8 (poly8x16_t __a)
2884 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t)
2885 __a);
2888 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2889 vreinterpretq_f32_p16 (poly16x8_t __a)
2891 return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t)
2892 __a);
2895 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2896 vreinterpret_s64_s8 (int8x8_t __a)
2898 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
2901 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2902 vreinterpret_s64_s16 (int16x4_t __a)
2904 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
2907 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2908 vreinterpret_s64_s32 (int32x2_t __a)
2910 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
2913 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2914 vreinterpret_s64_f32 (float32x2_t __a)
2916 return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
2919 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2920 vreinterpret_s64_u8 (uint8x8_t __a)
2922 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2925 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2926 vreinterpret_s64_u16 (uint16x4_t __a)
2928 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2931 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2932 vreinterpret_s64_u32 (uint32x2_t __a)
2934 return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
2937 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2938 vreinterpret_s64_u64 (uint64x1_t __a)
2940 return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a);
2943 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2944 vreinterpret_s64_p8 (poly8x8_t __a)
2946 return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
2949 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2950 vreinterpret_s64_p16 (poly16x4_t __a)
2952 return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
2955 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2956 vreinterpretq_s64_s8 (int8x16_t __a)
2958 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
2961 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2962 vreinterpretq_s64_s16 (int16x8_t __a)
2964 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
2967 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2968 vreinterpretq_s64_s32 (int32x4_t __a)
2970 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
2973 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2974 vreinterpretq_s64_f32 (float32x4_t __a)
2976 return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
2979 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2980 vreinterpretq_s64_u8 (uint8x16_t __a)
2982 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
2985 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2986 vreinterpretq_s64_u16 (uint16x8_t __a)
2988 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
2991 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2992 vreinterpretq_s64_u32 (uint32x4_t __a)
2994 return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
2997 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2998 vreinterpretq_s64_u64 (uint64x2_t __a)
3000 return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a);
3003 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3004 vreinterpretq_s64_p8 (poly8x16_t __a)
3006 return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a);
3009 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3010 vreinterpretq_s64_p16 (poly16x8_t __a)
3012 return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3015 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3016 vreinterpret_u64_s8 (int8x8_t __a)
3018 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a);
3021 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3022 vreinterpret_u64_s16 (int16x4_t __a)
3024 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a);
3027 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3028 vreinterpret_u64_s32 (int32x2_t __a)
3030 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a);
3033 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3034 vreinterpret_u64_s64 (int64x1_t __a)
3036 return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a);
3039 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3040 vreinterpret_u64_f32 (float32x2_t __a)
3042 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a);
3045 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3046 vreinterpret_u64_u8 (uint8x8_t __a)
3048 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3051 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3052 vreinterpret_u64_u16 (uint16x4_t __a)
3054 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3057 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3058 vreinterpret_u64_u32 (uint32x2_t __a)
3060 return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a);
3063 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3064 vreinterpret_u64_p8 (poly8x8_t __a)
3066 return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a);
3069 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3070 vreinterpret_u64_p16 (poly16x4_t __a)
3072 return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a);
3075 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3076 vreinterpretq_u64_s8 (int8x16_t __a)
3078 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a);
3081 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3082 vreinterpretq_u64_s16 (int16x8_t __a)
3084 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a);
3087 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3088 vreinterpretq_u64_s32 (int32x4_t __a)
3090 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a);
3093 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3094 vreinterpretq_u64_s64 (int64x2_t __a)
3096 return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a);
3099 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3100 vreinterpretq_u64_f32 (float32x4_t __a)
3102 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a);
3105 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3106 vreinterpretq_u64_u8 (uint8x16_t __a)
3108 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3109 __a);
3112 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3113 vreinterpretq_u64_u16 (uint16x8_t __a)
3115 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3118 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3119 vreinterpretq_u64_u32 (uint32x4_t __a)
3121 return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a);
3124 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3125 vreinterpretq_u64_p8 (poly8x16_t __a)
3127 return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t)
3128 __a);
3131 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3132 vreinterpretq_u64_p16 (poly16x8_t __a)
3134 return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a);
3137 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3138 vreinterpret_s8_s16 (int16x4_t __a)
3140 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3143 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3144 vreinterpret_s8_s32 (int32x2_t __a)
3146 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3149 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3150 vreinterpret_s8_s64 (int64x1_t __a)
3152 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3155 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3156 vreinterpret_s8_f32 (float32x2_t __a)
3158 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3161 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3162 vreinterpret_s8_u8 (uint8x8_t __a)
3164 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3167 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3168 vreinterpret_s8_u16 (uint16x4_t __a)
3170 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3173 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3174 vreinterpret_s8_u32 (uint32x2_t __a)
3176 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3179 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3180 vreinterpret_s8_u64 (uint64x1_t __a)
3182 return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3185 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3186 vreinterpret_s8_p8 (poly8x8_t __a)
3188 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3191 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3192 vreinterpret_s8_p16 (poly16x4_t __a)
3194 return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3197 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3198 vreinterpretq_s8_s16 (int16x8_t __a)
3200 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3203 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3204 vreinterpretq_s8_s32 (int32x4_t __a)
3206 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3209 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3210 vreinterpretq_s8_s64 (int64x2_t __a)
3212 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3215 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3216 vreinterpretq_s8_f32 (float32x4_t __a)
3218 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3221 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3222 vreinterpretq_s8_u8 (uint8x16_t __a)
3224 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3225 __a);
3228 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3229 vreinterpretq_s8_u16 (uint16x8_t __a)
3231 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3234 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3235 vreinterpretq_s8_u32 (uint32x4_t __a)
3237 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a);
3240 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3241 vreinterpretq_s8_u64 (uint64x2_t __a)
3243 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a);
3246 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3247 vreinterpretq_s8_p8 (poly8x16_t __a)
3249 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3250 __a);
3253 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3254 vreinterpretq_s8_p16 (poly16x8_t __a)
3256 return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a);
3259 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3260 vreinterpret_s16_s8 (int8x8_t __a)
3262 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3265 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3266 vreinterpret_s16_s32 (int32x2_t __a)
3268 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3271 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3272 vreinterpret_s16_s64 (int64x1_t __a)
3274 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3277 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3278 vreinterpret_s16_f32 (float32x2_t __a)
3280 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3283 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3284 vreinterpret_s16_u8 (uint8x8_t __a)
3286 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3289 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3290 vreinterpret_s16_u16 (uint16x4_t __a)
3292 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3295 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3296 vreinterpret_s16_u32 (uint32x2_t __a)
3298 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3301 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3302 vreinterpret_s16_u64 (uint64x1_t __a)
3304 return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3307 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3308 vreinterpret_s16_p8 (poly8x8_t __a)
3310 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3313 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3314 vreinterpret_s16_p16 (poly16x4_t __a)
3316 return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3319 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3320 vreinterpretq_s16_s8 (int8x16_t __a)
3322 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3325 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3326 vreinterpretq_s16_s32 (int32x4_t __a)
3328 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3331 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3332 vreinterpretq_s16_s64 (int64x2_t __a)
3334 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3337 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3338 vreinterpretq_s16_f32 (float32x4_t __a)
3340 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3343 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3344 vreinterpretq_s16_u8 (uint8x16_t __a)
3346 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3349 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3350 vreinterpretq_s16_u16 (uint16x8_t __a)
3352 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3355 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3356 vreinterpretq_s16_u32 (uint32x4_t __a)
3358 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3361 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3362 vreinterpretq_s16_u64 (uint64x2_t __a)
3364 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3367 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3368 vreinterpretq_s16_p8 (poly8x16_t __a)
3370 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a);
3373 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3374 vreinterpretq_s16_p16 (poly16x8_t __a)
3376 return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3379 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3380 vreinterpret_s32_s8 (int8x8_t __a)
3382 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3385 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3386 vreinterpret_s32_s16 (int16x4_t __a)
3388 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3391 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3392 vreinterpret_s32_s64 (int64x1_t __a)
3394 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3397 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3398 vreinterpret_s32_f32 (float32x2_t __a)
3400 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3403 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3404 vreinterpret_s32_u8 (uint8x8_t __a)
3406 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3409 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3410 vreinterpret_s32_u16 (uint16x4_t __a)
3412 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3415 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3416 vreinterpret_s32_u32 (uint32x2_t __a)
3418 return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a);
3421 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3422 vreinterpret_s32_u64 (uint64x1_t __a)
3424 return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3427 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3428 vreinterpret_s32_p8 (poly8x8_t __a)
3430 return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3433 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3434 vreinterpret_s32_p16 (poly16x4_t __a)
3436 return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3439 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3440 vreinterpretq_s32_s8 (int8x16_t __a)
3442 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3445 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3446 vreinterpretq_s32_s16 (int16x8_t __a)
3448 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3451 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3452 vreinterpretq_s32_s64 (int64x2_t __a)
3454 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3457 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3458 vreinterpretq_s32_f32 (float32x4_t __a)
3460 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3463 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3464 vreinterpretq_s32_u8 (uint8x16_t __a)
3466 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3469 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3470 vreinterpretq_s32_u16 (uint16x8_t __a)
3472 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3475 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3476 vreinterpretq_s32_u32 (uint32x4_t __a)
3478 return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a);
3481 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3482 vreinterpretq_s32_u64 (uint64x2_t __a)
3484 return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3487 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3488 vreinterpretq_s32_p8 (poly8x16_t __a)
3490 return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a);
3493 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3494 vreinterpretq_s32_p16 (poly16x8_t __a)
3496 return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3499 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3500 vreinterpret_u8_s8 (int8x8_t __a)
3502 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a);
3505 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3506 vreinterpret_u8_s16 (int16x4_t __a)
3508 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a);
3511 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3512 vreinterpret_u8_s32 (int32x2_t __a)
3514 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a);
3517 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3518 vreinterpret_u8_s64 (int64x1_t __a)
3520 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a);
3523 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3524 vreinterpret_u8_f32 (float32x2_t __a)
3526 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a);
3529 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3530 vreinterpret_u8_u16 (uint16x4_t __a)
3532 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3535 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3536 vreinterpret_u8_u32 (uint32x2_t __a)
3538 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a);
3541 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3542 vreinterpret_u8_u64 (uint64x1_t __a)
3544 return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a);
3547 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3548 vreinterpret_u8_p8 (poly8x8_t __a)
3550 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a);
3553 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3554 vreinterpret_u8_p16 (poly16x4_t __a)
3556 return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a);
3559 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3560 vreinterpretq_u8_s8 (int8x16_t __a)
3562 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a);
3565 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3566 vreinterpretq_u8_s16 (int16x8_t __a)
3568 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a);
3571 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3572 vreinterpretq_u8_s32 (int32x4_t __a)
3574 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a);
3577 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3578 vreinterpretq_u8_s64 (int64x2_t __a)
3580 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a);
3583 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3584 vreinterpretq_u8_f32 (float32x4_t __a)
3586 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a);
3589 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3590 vreinterpretq_u8_u16 (uint16x8_t __a)
3592 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3593 __a);
3596 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3597 vreinterpretq_u8_u32 (uint32x4_t __a)
3599 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t)
3600 __a);
3603 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3604 vreinterpretq_u8_u64 (uint64x2_t __a)
3606 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t)
3607 __a);
3610 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3611 vreinterpretq_u8_p8 (poly8x16_t __a)
3613 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t)
3614 __a);
3617 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3618 vreinterpretq_u8_p16 (poly16x8_t __a)
3620 return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t)
3621 __a);
3624 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3625 vreinterpret_u16_s8 (int8x8_t __a)
3627 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a);
3630 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3631 vreinterpret_u16_s16 (int16x4_t __a)
3633 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a);
3636 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3637 vreinterpret_u16_s32 (int32x2_t __a)
3639 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a);
3642 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3643 vreinterpret_u16_s64 (int64x1_t __a)
3645 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a);
3648 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3649 vreinterpret_u16_f32 (float32x2_t __a)
3651 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a);
3654 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3655 vreinterpret_u16_u8 (uint8x8_t __a)
3657 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3660 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3661 vreinterpret_u16_u32 (uint32x2_t __a)
3663 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a);
3666 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3667 vreinterpret_u16_u64 (uint64x1_t __a)
3669 return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a);
3672 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3673 vreinterpret_u16_p8 (poly8x8_t __a)
3675 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a);
3678 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3679 vreinterpret_u16_p16 (poly16x4_t __a)
3681 return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a);
3684 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3685 vreinterpretq_u16_s8 (int8x16_t __a)
3687 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a);
3690 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3691 vreinterpretq_u16_s16 (int16x8_t __a)
3693 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a);
3696 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3697 vreinterpretq_u16_s32 (int32x4_t __a)
3699 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a);
3702 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3703 vreinterpretq_u16_s64 (int64x2_t __a)
3705 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a);
3708 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3709 vreinterpretq_u16_f32 (float32x4_t __a)
3711 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a);
3714 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3715 vreinterpretq_u16_u8 (uint8x16_t __a)
3717 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3718 __a);
3721 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3722 vreinterpretq_u16_u32 (uint32x4_t __a)
3724 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a);
3727 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3728 vreinterpretq_u16_u64 (uint64x2_t __a)
3730 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a);
3733 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3734 vreinterpretq_u16_p8 (poly8x16_t __a)
3736 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t)
3737 __a);
3740 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3741 vreinterpretq_u16_p16 (poly16x8_t __a)
3743 return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a);
3746 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3747 vreinterpret_u32_s8 (int8x8_t __a)
3749 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a);
3752 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3753 vreinterpret_u32_s16 (int16x4_t __a)
3755 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a);
3758 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3759 vreinterpret_u32_s32 (int32x2_t __a)
3761 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a);
3764 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3765 vreinterpret_u32_s64 (int64x1_t __a)
3767 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a);
3770 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3771 vreinterpret_u32_f32 (float32x2_t __a)
3773 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a);
3776 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3777 vreinterpret_u32_u8 (uint8x8_t __a)
3779 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3782 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3783 vreinterpret_u32_u16 (uint16x4_t __a)
3785 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3788 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3789 vreinterpret_u32_u64 (uint64x1_t __a)
3791 return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a);
3794 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3795 vreinterpret_u32_p8 (poly8x8_t __a)
3797 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a);
3800 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
3801 vreinterpret_u32_p16 (poly16x4_t __a)
3803 return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a);
3806 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3807 vreinterpretq_u32_s8 (int8x16_t __a)
3809 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a);
3812 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3813 vreinterpretq_u32_s16 (int16x8_t __a)
3815 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a);
3818 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3819 vreinterpretq_u32_s32 (int32x4_t __a)
3821 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a);
3824 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3825 vreinterpretq_u32_s64 (int64x2_t __a)
3827 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a);
3830 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3831 vreinterpretq_u32_f32 (float32x4_t __a)
3833 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a);
3836 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3837 vreinterpretq_u32_u8 (uint8x16_t __a)
3839 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3840 __a);
3843 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3844 vreinterpretq_u32_u16 (uint16x8_t __a)
3846 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3849 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3850 vreinterpretq_u32_u64 (uint64x2_t __a)
3852 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a);
3855 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3856 vreinterpretq_u32_p8 (poly8x16_t __a)
3858 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t)
3859 __a);
3862 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3863 vreinterpretq_u32_p16 (poly16x8_t __a)
3865 return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a);
3868 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3869 vcombine_s8 (int8x8_t __a, int8x8_t __b)
3871 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
3874 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3875 vcombine_s16 (int16x4_t __a, int16x4_t __b)
3877 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
3880 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3881 vcombine_s32 (int32x2_t __a, int32x2_t __b)
3883 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
3886 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3887 vcombine_s64 (int64x1_t __a, int64x1_t __b)
3889 return (int64x2_t) __builtin_aarch64_combinedi (__a, __b);
3892 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3893 vcombine_f32 (float32x2_t __a, float32x2_t __b)
3895 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
3898 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3899 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
3901 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
3902 (int8x8_t) __b);
3905 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
3906 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
3908 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
3909 (int16x4_t) __b);
3912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
3913 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
3915 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
3916 (int32x2_t) __b);
3919 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3920 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
3922 return (uint64x2_t) __builtin_aarch64_combinedi ((int64x1_t) __a,
3923 (int64x1_t) __b);
3926 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
3927 vcombine_f64 (float64x1_t __a, float64x1_t __b)
3929 return (float64x2_t) __builtin_aarch64_combinedf (__a, __b);
3932 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
3933 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
3935 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
3936 (int8x8_t) __b);
3939 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
3940 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
3942 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
3943 (int16x4_t) __b);
3946 /* Start of temporary inline asm implementations. */
3948 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3949 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
3951 int8x8_t result;
3952 __asm__ ("saba %0.8b,%2.8b,%3.8b"
3953 : "=w"(result)
3954 : "0"(a), "w"(b), "w"(c)
3955 : /* No clobbers */);
3956 return result;
3959 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3960 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
3962 int16x4_t result;
3963 __asm__ ("saba %0.4h,%2.4h,%3.4h"
3964 : "=w"(result)
3965 : "0"(a), "w"(b), "w"(c)
3966 : /* No clobbers */);
3967 return result;
3970 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3971 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
3973 int32x2_t result;
3974 __asm__ ("saba %0.2s,%2.2s,%3.2s"
3975 : "=w"(result)
3976 : "0"(a), "w"(b), "w"(c)
3977 : /* No clobbers */);
3978 return result;
3981 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3982 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
3984 uint8x8_t result;
3985 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
3986 : "=w"(result)
3987 : "0"(a), "w"(b), "w"(c)
3988 : /* No clobbers */);
3989 return result;
3992 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3993 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
3995 uint16x4_t result;
3996 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
3997 : "=w"(result)
3998 : "0"(a), "w"(b), "w"(c)
3999 : /* No clobbers */);
4000 return result;
4003 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4004 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4006 uint32x2_t result;
4007 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4008 : "=w"(result)
4009 : "0"(a), "w"(b), "w"(c)
4010 : /* No clobbers */);
4011 return result;
4014 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4015 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4017 int16x8_t result;
4018 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4019 : "=w"(result)
4020 : "0"(a), "w"(b), "w"(c)
4021 : /* No clobbers */);
4022 return result;
4025 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4026 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4028 int32x4_t result;
4029 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4030 : "=w"(result)
4031 : "0"(a), "w"(b), "w"(c)
4032 : /* No clobbers */);
4033 return result;
4036 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4037 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4039 int64x2_t result;
4040 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4041 : "=w"(result)
4042 : "0"(a), "w"(b), "w"(c)
4043 : /* No clobbers */);
4044 return result;
4047 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4048 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4050 uint16x8_t result;
4051 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4052 : "=w"(result)
4053 : "0"(a), "w"(b), "w"(c)
4054 : /* No clobbers */);
4055 return result;
4058 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4059 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4061 uint32x4_t result;
4062 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4063 : "=w"(result)
4064 : "0"(a), "w"(b), "w"(c)
4065 : /* No clobbers */);
4066 return result;
4069 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4070 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4072 uint64x2_t result;
4073 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4074 : "=w"(result)
4075 : "0"(a), "w"(b), "w"(c)
4076 : /* No clobbers */);
4077 return result;
4080 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4081 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4083 int16x8_t result;
4084 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4085 : "=w"(result)
4086 : "0"(a), "w"(b), "w"(c)
4087 : /* No clobbers */);
4088 return result;
4091 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4092 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4094 int32x4_t result;
4095 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4096 : "=w"(result)
4097 : "0"(a), "w"(b), "w"(c)
4098 : /* No clobbers */);
4099 return result;
4102 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4103 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4105 int64x2_t result;
4106 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4107 : "=w"(result)
4108 : "0"(a), "w"(b), "w"(c)
4109 : /* No clobbers */);
4110 return result;
4113 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4114 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4116 uint16x8_t result;
4117 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4118 : "=w"(result)
4119 : "0"(a), "w"(b), "w"(c)
4120 : /* No clobbers */);
4121 return result;
4124 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4125 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4127 uint32x4_t result;
4128 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4129 : "=w"(result)
4130 : "0"(a), "w"(b), "w"(c)
4131 : /* No clobbers */);
4132 return result;
4135 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4136 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4138 uint64x2_t result;
4139 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4140 : "=w"(result)
4141 : "0"(a), "w"(b), "w"(c)
4142 : /* No clobbers */);
4143 return result;
4146 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4147 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4149 int8x16_t result;
4150 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4151 : "=w"(result)
4152 : "0"(a), "w"(b), "w"(c)
4153 : /* No clobbers */);
4154 return result;
4157 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4158 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4160 int16x8_t result;
4161 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4162 : "=w"(result)
4163 : "0"(a), "w"(b), "w"(c)
4164 : /* No clobbers */);
4165 return result;
4168 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4169 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4171 int32x4_t result;
4172 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4173 : "=w"(result)
4174 : "0"(a), "w"(b), "w"(c)
4175 : /* No clobbers */);
4176 return result;
4179 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4180 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4182 uint8x16_t result;
4183 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4184 : "=w"(result)
4185 : "0"(a), "w"(b), "w"(c)
4186 : /* No clobbers */);
4187 return result;
4190 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4191 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4193 uint16x8_t result;
4194 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4195 : "=w"(result)
4196 : "0"(a), "w"(b), "w"(c)
4197 : /* No clobbers */);
4198 return result;
4201 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4202 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4204 uint32x4_t result;
4205 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4206 : "=w"(result)
4207 : "0"(a), "w"(b), "w"(c)
4208 : /* No clobbers */);
4209 return result;
4212 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4213 vabd_f32 (float32x2_t a, float32x2_t b)
4215 float32x2_t result;
4216 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4217 : "=w"(result)
4218 : "w"(a), "w"(b)
4219 : /* No clobbers */);
4220 return result;
4223 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4224 vabd_s8 (int8x8_t a, int8x8_t b)
4226 int8x8_t result;
4227 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4228 : "=w"(result)
4229 : "w"(a), "w"(b)
4230 : /* No clobbers */);
4231 return result;
4234 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4235 vabd_s16 (int16x4_t a, int16x4_t b)
4237 int16x4_t result;
4238 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4239 : "=w"(result)
4240 : "w"(a), "w"(b)
4241 : /* No clobbers */);
4242 return result;
4245 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4246 vabd_s32 (int32x2_t a, int32x2_t b)
4248 int32x2_t result;
4249 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4250 : "=w"(result)
4251 : "w"(a), "w"(b)
4252 : /* No clobbers */);
4253 return result;
4256 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4257 vabd_u8 (uint8x8_t a, uint8x8_t b)
4259 uint8x8_t result;
4260 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4261 : "=w"(result)
4262 : "w"(a), "w"(b)
4263 : /* No clobbers */);
4264 return result;
4267 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4268 vabd_u16 (uint16x4_t a, uint16x4_t b)
4270 uint16x4_t result;
4271 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4272 : "=w"(result)
4273 : "w"(a), "w"(b)
4274 : /* No clobbers */);
4275 return result;
4278 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4279 vabd_u32 (uint32x2_t a, uint32x2_t b)
4281 uint32x2_t result;
4282 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4283 : "=w"(result)
4284 : "w"(a), "w"(b)
4285 : /* No clobbers */);
4286 return result;
4289 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4290 vabdd_f64 (float64_t a, float64_t b)
4292 float64_t result;
4293 __asm__ ("fabd %d0, %d1, %d2"
4294 : "=w"(result)
4295 : "w"(a), "w"(b)
4296 : /* No clobbers */);
4297 return result;
4300 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4301 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4303 int16x8_t result;
4304 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4305 : "=w"(result)
4306 : "w"(a), "w"(b)
4307 : /* No clobbers */);
4308 return result;
4311 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4312 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4314 int32x4_t result;
4315 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4316 : "=w"(result)
4317 : "w"(a), "w"(b)
4318 : /* No clobbers */);
4319 return result;
4322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4323 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4325 int64x2_t result;
4326 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4327 : "=w"(result)
4328 : "w"(a), "w"(b)
4329 : /* No clobbers */);
4330 return result;
4333 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4334 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4336 uint16x8_t result;
4337 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4338 : "=w"(result)
4339 : "w"(a), "w"(b)
4340 : /* No clobbers */);
4341 return result;
4344 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4345 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4347 uint32x4_t result;
4348 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4349 : "=w"(result)
4350 : "w"(a), "w"(b)
4351 : /* No clobbers */);
4352 return result;
4355 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4356 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4358 uint64x2_t result;
4359 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4360 : "=w"(result)
4361 : "w"(a), "w"(b)
4362 : /* No clobbers */);
4363 return result;
4366 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4367 vabdl_s8 (int8x8_t a, int8x8_t b)
4369 int16x8_t result;
4370 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4371 : "=w"(result)
4372 : "w"(a), "w"(b)
4373 : /* No clobbers */);
4374 return result;
4377 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4378 vabdl_s16 (int16x4_t a, int16x4_t b)
4380 int32x4_t result;
4381 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4382 : "=w"(result)
4383 : "w"(a), "w"(b)
4384 : /* No clobbers */);
4385 return result;
4388 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4389 vabdl_s32 (int32x2_t a, int32x2_t b)
4391 int64x2_t result;
4392 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4393 : "=w"(result)
4394 : "w"(a), "w"(b)
4395 : /* No clobbers */);
4396 return result;
4399 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4400 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4402 uint16x8_t result;
4403 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4404 : "=w"(result)
4405 : "w"(a), "w"(b)
4406 : /* No clobbers */);
4407 return result;
4410 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4411 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4413 uint32x4_t result;
4414 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4415 : "=w"(result)
4416 : "w"(a), "w"(b)
4417 : /* No clobbers */);
4418 return result;
4421 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4422 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4424 uint64x2_t result;
4425 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4426 : "=w"(result)
4427 : "w"(a), "w"(b)
4428 : /* No clobbers */);
4429 return result;
4432 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4433 vabdq_f32 (float32x4_t a, float32x4_t b)
4435 float32x4_t result;
4436 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4437 : "=w"(result)
4438 : "w"(a), "w"(b)
4439 : /* No clobbers */);
4440 return result;
4443 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4444 vabdq_f64 (float64x2_t a, float64x2_t b)
4446 float64x2_t result;
4447 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4448 : "=w"(result)
4449 : "w"(a), "w"(b)
4450 : /* No clobbers */);
4451 return result;
4454 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4455 vabdq_s8 (int8x16_t a, int8x16_t b)
4457 int8x16_t result;
4458 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4459 : "=w"(result)
4460 : "w"(a), "w"(b)
4461 : /* No clobbers */);
4462 return result;
4465 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4466 vabdq_s16 (int16x8_t a, int16x8_t b)
4468 int16x8_t result;
4469 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4470 : "=w"(result)
4471 : "w"(a), "w"(b)
4472 : /* No clobbers */);
4473 return result;
4476 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4477 vabdq_s32 (int32x4_t a, int32x4_t b)
4479 int32x4_t result;
4480 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4481 : "=w"(result)
4482 : "w"(a), "w"(b)
4483 : /* No clobbers */);
4484 return result;
4487 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4488 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4490 uint8x16_t result;
4491 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4492 : "=w"(result)
4493 : "w"(a), "w"(b)
4494 : /* No clobbers */);
4495 return result;
4498 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4499 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4501 uint16x8_t result;
4502 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4503 : "=w"(result)
4504 : "w"(a), "w"(b)
4505 : /* No clobbers */);
4506 return result;
4509 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4510 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4512 uint32x4_t result;
4513 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4514 : "=w"(result)
4515 : "w"(a), "w"(b)
4516 : /* No clobbers */);
4517 return result;
4520 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4521 vabds_f32 (float32_t a, float32_t b)
4523 float32_t result;
4524 __asm__ ("fabd %s0, %s1, %s2"
4525 : "=w"(result)
4526 : "w"(a), "w"(b)
4527 : /* No clobbers */);
4528 return result;
4531 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4532 vaddlv_s8 (int8x8_t a)
4534 int16_t result;
4535 __asm__ ("saddlv %h0,%1.8b"
4536 : "=w"(result)
4537 : "w"(a)
4538 : /* No clobbers */);
4539 return result;
4542 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4543 vaddlv_s16 (int16x4_t a)
4545 int32_t result;
4546 __asm__ ("saddlv %s0,%1.4h"
4547 : "=w"(result)
4548 : "w"(a)
4549 : /* No clobbers */);
4550 return result;
4553 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4554 vaddlv_u8 (uint8x8_t a)
4556 uint16_t result;
4557 __asm__ ("uaddlv %h0,%1.8b"
4558 : "=w"(result)
4559 : "w"(a)
4560 : /* No clobbers */);
4561 return result;
4564 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4565 vaddlv_u16 (uint16x4_t a)
4567 uint32_t result;
4568 __asm__ ("uaddlv %s0,%1.4h"
4569 : "=w"(result)
4570 : "w"(a)
4571 : /* No clobbers */);
4572 return result;
4575 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4576 vaddlvq_s8 (int8x16_t a)
4578 int16_t result;
4579 __asm__ ("saddlv %h0,%1.16b"
4580 : "=w"(result)
4581 : "w"(a)
4582 : /* No clobbers */);
4583 return result;
4586 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
4587 vaddlvq_s16 (int16x8_t a)
4589 int32_t result;
4590 __asm__ ("saddlv %s0,%1.8h"
4591 : "=w"(result)
4592 : "w"(a)
4593 : /* No clobbers */);
4594 return result;
4597 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
4598 vaddlvq_s32 (int32x4_t a)
4600 int64_t result;
4601 __asm__ ("saddlv %d0,%1.4s"
4602 : "=w"(result)
4603 : "w"(a)
4604 : /* No clobbers */);
4605 return result;
4608 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
4609 vaddlvq_u8 (uint8x16_t a)
4611 uint16_t result;
4612 __asm__ ("uaddlv %h0,%1.16b"
4613 : "=w"(result)
4614 : "w"(a)
4615 : /* No clobbers */);
4616 return result;
4619 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
4620 vaddlvq_u16 (uint16x8_t a)
4622 uint32_t result;
4623 __asm__ ("uaddlv %s0,%1.8h"
4624 : "=w"(result)
4625 : "w"(a)
4626 : /* No clobbers */);
4627 return result;
4630 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
4631 vaddlvq_u32 (uint32x4_t a)
4633 uint64_t result;
4634 __asm__ ("uaddlv %d0,%1.4s"
4635 : "=w"(result)
4636 : "w"(a)
4637 : /* No clobbers */);
4638 return result;
4641 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4642 vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
4644 float32x2_t result;
4645 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4646 : "=w"(result)
4647 : "0"(a), "w"(b), "w"(c)
4648 : /* No clobbers */);
4649 return result;
4652 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4653 vbsl_p8 (uint8x8_t a, poly8x8_t b, poly8x8_t c)
4655 poly8x8_t result;
4656 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4657 : "=w"(result)
4658 : "0"(a), "w"(b), "w"(c)
4659 : /* No clobbers */);
4660 return result;
4663 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4664 vbsl_p16 (uint16x4_t a, poly16x4_t b, poly16x4_t c)
4666 poly16x4_t result;
4667 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4668 : "=w"(result)
4669 : "0"(a), "w"(b), "w"(c)
4670 : /* No clobbers */);
4671 return result;
4674 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4675 vbsl_s8 (uint8x8_t a, int8x8_t b, int8x8_t c)
4677 int8x8_t result;
4678 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4679 : "=w"(result)
4680 : "0"(a), "w"(b), "w"(c)
4681 : /* No clobbers */);
4682 return result;
4685 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4686 vbsl_s16 (uint16x4_t a, int16x4_t b, int16x4_t c)
4688 int16x4_t result;
4689 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4690 : "=w"(result)
4691 : "0"(a), "w"(b), "w"(c)
4692 : /* No clobbers */);
4693 return result;
4696 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4697 vbsl_s32 (uint32x2_t a, int32x2_t b, int32x2_t c)
4699 int32x2_t result;
4700 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4701 : "=w"(result)
4702 : "0"(a), "w"(b), "w"(c)
4703 : /* No clobbers */);
4704 return result;
4707 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4708 vbsl_s64 (uint64x1_t a, int64x1_t b, int64x1_t c)
4710 int64x1_t result;
4711 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4712 : "=w"(result)
4713 : "0"(a), "w"(b), "w"(c)
4714 : /* No clobbers */);
4715 return result;
4718 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4719 vbsl_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4721 uint8x8_t result;
4722 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4723 : "=w"(result)
4724 : "0"(a), "w"(b), "w"(c)
4725 : /* No clobbers */);
4726 return result;
4729 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4730 vbsl_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4732 uint16x4_t result;
4733 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4734 : "=w"(result)
4735 : "0"(a), "w"(b), "w"(c)
4736 : /* No clobbers */);
4737 return result;
4740 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4741 vbsl_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4743 uint32x2_t result;
4744 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4745 : "=w"(result)
4746 : "0"(a), "w"(b), "w"(c)
4747 : /* No clobbers */);
4748 return result;
4751 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4752 vbsl_u64 (uint64x1_t a, uint64x1_t b, uint64x1_t c)
4754 uint64x1_t result;
4755 __asm__ ("bsl %0.8b, %2.8b, %3.8b"
4756 : "=w"(result)
4757 : "0"(a), "w"(b), "w"(c)
4758 : /* No clobbers */);
4759 return result;
4762 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4763 vbslq_f32 (uint32x4_t a, float32x4_t b, float32x4_t c)
4765 float32x4_t result;
4766 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4767 : "=w"(result)
4768 : "0"(a), "w"(b), "w"(c)
4769 : /* No clobbers */);
4770 return result;
4773 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4774 vbslq_f64 (uint64x2_t a, float64x2_t b, float64x2_t c)
4776 float64x2_t result;
4777 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4778 : "=w"(result)
4779 : "0"(a), "w"(b), "w"(c)
4780 : /* No clobbers */);
4781 return result;
4784 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4785 vbslq_p8 (uint8x16_t a, poly8x16_t b, poly8x16_t c)
4787 poly8x16_t result;
4788 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4789 : "=w"(result)
4790 : "0"(a), "w"(b), "w"(c)
4791 : /* No clobbers */);
4792 return result;
4795 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4796 vbslq_p16 (uint16x8_t a, poly16x8_t b, poly16x8_t c)
4798 poly16x8_t result;
4799 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4800 : "=w"(result)
4801 : "0"(a), "w"(b), "w"(c)
4802 : /* No clobbers */);
4803 return result;
4806 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4807 vbslq_s8 (uint8x16_t a, int8x16_t b, int8x16_t c)
4809 int8x16_t result;
4810 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4811 : "=w"(result)
4812 : "0"(a), "w"(b), "w"(c)
4813 : /* No clobbers */);
4814 return result;
4817 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4818 vbslq_s16 (uint16x8_t a, int16x8_t b, int16x8_t c)
4820 int16x8_t result;
4821 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4822 : "=w"(result)
4823 : "0"(a), "w"(b), "w"(c)
4824 : /* No clobbers */);
4825 return result;
4828 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4829 vbslq_s32 (uint32x4_t a, int32x4_t b, int32x4_t c)
4831 int32x4_t result;
4832 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4833 : "=w"(result)
4834 : "0"(a), "w"(b), "w"(c)
4835 : /* No clobbers */);
4836 return result;
4839 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4840 vbslq_s64 (uint64x2_t a, int64x2_t b, int64x2_t c)
4842 int64x2_t result;
4843 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4844 : "=w"(result)
4845 : "0"(a), "w"(b), "w"(c)
4846 : /* No clobbers */);
4847 return result;
4850 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4851 vbslq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4853 uint8x16_t result;
4854 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4855 : "=w"(result)
4856 : "0"(a), "w"(b), "w"(c)
4857 : /* No clobbers */);
4858 return result;
4861 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4862 vbslq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4864 uint16x8_t result;
4865 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4866 : "=w"(result)
4867 : "0"(a), "w"(b), "w"(c)
4868 : /* No clobbers */);
4869 return result;
4872 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4873 vbslq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4875 uint32x4_t result;
4876 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4877 : "=w"(result)
4878 : "0"(a), "w"(b), "w"(c)
4879 : /* No clobbers */);
4880 return result;
4883 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4884 vbslq_u64 (uint64x2_t a, uint64x2_t b, uint64x2_t c)
4886 uint64x2_t result;
4887 __asm__ ("bsl %0.16b, %2.16b, %3.16b"
4888 : "=w"(result)
4889 : "0"(a), "w"(b), "w"(c)
4890 : /* No clobbers */);
4891 return result;
4894 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4895 vcls_s8 (int8x8_t a)
4897 int8x8_t result;
4898 __asm__ ("cls %0.8b,%1.8b"
4899 : "=w"(result)
4900 : "w"(a)
4901 : /* No clobbers */);
4902 return result;
4905 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4906 vcls_s16 (int16x4_t a)
4908 int16x4_t result;
4909 __asm__ ("cls %0.4h,%1.4h"
4910 : "=w"(result)
4911 : "w"(a)
4912 : /* No clobbers */);
4913 return result;
4916 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4917 vcls_s32 (int32x2_t a)
4919 int32x2_t result;
4920 __asm__ ("cls %0.2s,%1.2s"
4921 : "=w"(result)
4922 : "w"(a)
4923 : /* No clobbers */);
4924 return result;
4927 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4928 vclsq_s8 (int8x16_t a)
4930 int8x16_t result;
4931 __asm__ ("cls %0.16b,%1.16b"
4932 : "=w"(result)
4933 : "w"(a)
4934 : /* No clobbers */);
4935 return result;
4938 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4939 vclsq_s16 (int16x8_t a)
4941 int16x8_t result;
4942 __asm__ ("cls %0.8h,%1.8h"
4943 : "=w"(result)
4944 : "w"(a)
4945 : /* No clobbers */);
4946 return result;
4949 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4950 vclsq_s32 (int32x4_t a)
4952 int32x4_t result;
4953 __asm__ ("cls %0.4s,%1.4s"
4954 : "=w"(result)
4955 : "w"(a)
4956 : /* No clobbers */);
4957 return result;
4960 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4961 vclz_s8 (int8x8_t a)
4963 int8x8_t result;
4964 __asm__ ("clz %0.8b,%1.8b"
4965 : "=w"(result)
4966 : "w"(a)
4967 : /* No clobbers */);
4968 return result;
4971 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4972 vclz_s16 (int16x4_t a)
4974 int16x4_t result;
4975 __asm__ ("clz %0.4h,%1.4h"
4976 : "=w"(result)
4977 : "w"(a)
4978 : /* No clobbers */);
4979 return result;
4982 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4983 vclz_s32 (int32x2_t a)
4985 int32x2_t result;
4986 __asm__ ("clz %0.2s,%1.2s"
4987 : "=w"(result)
4988 : "w"(a)
4989 : /* No clobbers */);
4990 return result;
4993 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4994 vclz_u8 (uint8x8_t a)
4996 uint8x8_t result;
4997 __asm__ ("clz %0.8b,%1.8b"
4998 : "=w"(result)
4999 : "w"(a)
5000 : /* No clobbers */);
5001 return result;
5004 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5005 vclz_u16 (uint16x4_t a)
5007 uint16x4_t result;
5008 __asm__ ("clz %0.4h,%1.4h"
5009 : "=w"(result)
5010 : "w"(a)
5011 : /* No clobbers */);
5012 return result;
5015 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5016 vclz_u32 (uint32x2_t a)
5018 uint32x2_t result;
5019 __asm__ ("clz %0.2s,%1.2s"
5020 : "=w"(result)
5021 : "w"(a)
5022 : /* No clobbers */);
5023 return result;
5026 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5027 vclzq_s8 (int8x16_t a)
5029 int8x16_t result;
5030 __asm__ ("clz %0.16b,%1.16b"
5031 : "=w"(result)
5032 : "w"(a)
5033 : /* No clobbers */);
5034 return result;
5037 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5038 vclzq_s16 (int16x8_t a)
5040 int16x8_t result;
5041 __asm__ ("clz %0.8h,%1.8h"
5042 : "=w"(result)
5043 : "w"(a)
5044 : /* No clobbers */);
5045 return result;
5048 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5049 vclzq_s32 (int32x4_t a)
5051 int32x4_t result;
5052 __asm__ ("clz %0.4s,%1.4s"
5053 : "=w"(result)
5054 : "w"(a)
5055 : /* No clobbers */);
5056 return result;
5059 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5060 vclzq_u8 (uint8x16_t a)
5062 uint8x16_t result;
5063 __asm__ ("clz %0.16b,%1.16b"
5064 : "=w"(result)
5065 : "w"(a)
5066 : /* No clobbers */);
5067 return result;
5070 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
5071 vclzq_u16 (uint16x8_t a)
5073 uint16x8_t result;
5074 __asm__ ("clz %0.8h,%1.8h"
5075 : "=w"(result)
5076 : "w"(a)
5077 : /* No clobbers */);
5078 return result;
5081 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
5082 vclzq_u32 (uint32x4_t a)
5084 uint32x4_t result;
5085 __asm__ ("clz %0.4s,%1.4s"
5086 : "=w"(result)
5087 : "w"(a)
5088 : /* No clobbers */);
5089 return result;
5092 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5093 vcnt_p8 (poly8x8_t a)
5095 poly8x8_t result;
5096 __asm__ ("cnt %0.8b,%1.8b"
5097 : "=w"(result)
5098 : "w"(a)
5099 : /* No clobbers */);
5100 return result;
5103 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5104 vcnt_s8 (int8x8_t a)
5106 int8x8_t result;
5107 __asm__ ("cnt %0.8b,%1.8b"
5108 : "=w"(result)
5109 : "w"(a)
5110 : /* No clobbers */);
5111 return result;
5114 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5115 vcnt_u8 (uint8x8_t a)
5117 uint8x8_t result;
5118 __asm__ ("cnt %0.8b,%1.8b"
5119 : "=w"(result)
5120 : "w"(a)
5121 : /* No clobbers */);
5122 return result;
5125 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5126 vcntq_p8 (poly8x16_t a)
5128 poly8x16_t result;
5129 __asm__ ("cnt %0.16b,%1.16b"
5130 : "=w"(result)
5131 : "w"(a)
5132 : /* No clobbers */);
5133 return result;
5136 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5137 vcntq_s8 (int8x16_t a)
5139 int8x16_t result;
5140 __asm__ ("cnt %0.16b,%1.16b"
5141 : "=w"(result)
5142 : "w"(a)
5143 : /* No clobbers */);
5144 return result;
5147 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5148 vcntq_u8 (uint8x16_t a)
5150 uint8x16_t result;
5151 __asm__ ("cnt %0.16b,%1.16b"
5152 : "=w"(result)
5153 : "w"(a)
5154 : /* No clobbers */);
5155 return result;
5158 #define vcopyq_lane_f32(a, b, c, d) \
5159 __extension__ \
5160 ({ \
5161 float32x4_t c_ = (c); \
5162 float32x4_t a_ = (a); \
5163 float32x4_t result; \
5164 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5165 : "=w"(result) \
5166 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5167 : /* No clobbers */); \
5168 result; \
5171 #define vcopyq_lane_f64(a, b, c, d) \
5172 __extension__ \
5173 ({ \
5174 float64x2_t c_ = (c); \
5175 float64x2_t a_ = (a); \
5176 float64x2_t result; \
5177 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5178 : "=w"(result) \
5179 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5180 : /* No clobbers */); \
5181 result; \
5184 #define vcopyq_lane_p8(a, b, c, d) \
5185 __extension__ \
5186 ({ \
5187 poly8x16_t c_ = (c); \
5188 poly8x16_t a_ = (a); \
5189 poly8x16_t result; \
5190 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5191 : "=w"(result) \
5192 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5193 : /* No clobbers */); \
5194 result; \
5197 #define vcopyq_lane_p16(a, b, c, d) \
5198 __extension__ \
5199 ({ \
5200 poly16x8_t c_ = (c); \
5201 poly16x8_t a_ = (a); \
5202 poly16x8_t result; \
5203 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5204 : "=w"(result) \
5205 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5206 : /* No clobbers */); \
5207 result; \
5210 #define vcopyq_lane_s8(a, b, c, d) \
5211 __extension__ \
5212 ({ \
5213 int8x16_t c_ = (c); \
5214 int8x16_t a_ = (a); \
5215 int8x16_t result; \
5216 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5217 : "=w"(result) \
5218 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5219 : /* No clobbers */); \
5220 result; \
5223 #define vcopyq_lane_s16(a, b, c, d) \
5224 __extension__ \
5225 ({ \
5226 int16x8_t c_ = (c); \
5227 int16x8_t a_ = (a); \
5228 int16x8_t result; \
5229 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5230 : "=w"(result) \
5231 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5232 : /* No clobbers */); \
5233 result; \
5236 #define vcopyq_lane_s32(a, b, c, d) \
5237 __extension__ \
5238 ({ \
5239 int32x4_t c_ = (c); \
5240 int32x4_t a_ = (a); \
5241 int32x4_t result; \
5242 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5243 : "=w"(result) \
5244 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5245 : /* No clobbers */); \
5246 result; \
5249 #define vcopyq_lane_s64(a, b, c, d) \
5250 __extension__ \
5251 ({ \
5252 int64x2_t c_ = (c); \
5253 int64x2_t a_ = (a); \
5254 int64x2_t result; \
5255 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5256 : "=w"(result) \
5257 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5258 : /* No clobbers */); \
5259 result; \
5262 #define vcopyq_lane_u8(a, b, c, d) \
5263 __extension__ \
5264 ({ \
5265 uint8x16_t c_ = (c); \
5266 uint8x16_t a_ = (a); \
5267 uint8x16_t result; \
5268 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5269 : "=w"(result) \
5270 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5271 : /* No clobbers */); \
5272 result; \
5275 #define vcopyq_lane_u16(a, b, c, d) \
5276 __extension__ \
5277 ({ \
5278 uint16x8_t c_ = (c); \
5279 uint16x8_t a_ = (a); \
5280 uint16x8_t result; \
5281 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5282 : "=w"(result) \
5283 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5284 : /* No clobbers */); \
5285 result; \
5288 #define vcopyq_lane_u32(a, b, c, d) \
5289 __extension__ \
5290 ({ \
5291 uint32x4_t c_ = (c); \
5292 uint32x4_t a_ = (a); \
5293 uint32x4_t result; \
5294 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5295 : "=w"(result) \
5296 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5297 : /* No clobbers */); \
5298 result; \
5301 #define vcopyq_lane_u64(a, b, c, d) \
5302 __extension__ \
5303 ({ \
5304 uint64x2_t c_ = (c); \
5305 uint64x2_t a_ = (a); \
5306 uint64x2_t result; \
5307 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5308 : "=w"(result) \
5309 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5310 : /* No clobbers */); \
5311 result; \
5314 /* vcvt_f16_f32 not supported */
5316 /* vcvt_f32_f16 not supported */
5318 /* vcvt_high_f16_f32 not supported */
5320 /* vcvt_high_f32_f16 not supported */
5322 static float32x2_t vdup_n_f32 (float32_t);
5324 #define vcvt_n_f32_s32(a, b) \
5325 __extension__ \
5326 ({ \
5327 int32x2_t a_ = (a); \
5328 float32x2_t result; \
5329 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5330 : "=w"(result) \
5331 : "w"(a_), "i"(b) \
5332 : /* No clobbers */); \
5333 result; \
5336 #define vcvt_n_f32_u32(a, b) \
5337 __extension__ \
5338 ({ \
5339 uint32x2_t a_ = (a); \
5340 float32x2_t result; \
5341 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5342 : "=w"(result) \
5343 : "w"(a_), "i"(b) \
5344 : /* No clobbers */); \
5345 result; \
5348 #define vcvt_n_s32_f32(a, b) \
5349 __extension__ \
5350 ({ \
5351 float32x2_t a_ = (a); \
5352 int32x2_t result; \
5353 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5354 : "=w"(result) \
5355 : "w"(a_), "i"(b) \
5356 : /* No clobbers */); \
5357 result; \
5360 #define vcvt_n_u32_f32(a, b) \
5361 __extension__ \
5362 ({ \
5363 float32x2_t a_ = (a); \
5364 uint32x2_t result; \
5365 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5366 : "=w"(result) \
5367 : "w"(a_), "i"(b) \
5368 : /* No clobbers */); \
5369 result; \
5372 #define vcvtd_n_f64_s64(a, b) \
5373 __extension__ \
5374 ({ \
5375 int64_t a_ = (a); \
5376 int64_t result; \
5377 __asm__ ("scvtf %d0,%d1,%2" \
5378 : "=w"(result) \
5379 : "w"(a_), "i"(b) \
5380 : /* No clobbers */); \
5381 result; \
5384 #define vcvtd_n_f64_u64(a, b) \
5385 __extension__ \
5386 ({ \
5387 uint64_t a_ = (a); \
5388 uint64_t result; \
5389 __asm__ ("ucvtf %d0,%d1,%2" \
5390 : "=w"(result) \
5391 : "w"(a_), "i"(b) \
5392 : /* No clobbers */); \
5393 result; \
5396 #define vcvtd_n_s64_f64(a, b) \
5397 __extension__ \
5398 ({ \
5399 float64_t a_ = (a); \
5400 float64_t result; \
5401 __asm__ ("fcvtzs %d0,%d1,%2" \
5402 : "=w"(result) \
5403 : "w"(a_), "i"(b) \
5404 : /* No clobbers */); \
5405 result; \
5408 #define vcvtd_n_u64_f64(a, b) \
5409 __extension__ \
5410 ({ \
5411 float64_t a_ = (a); \
5412 float64_t result; \
5413 __asm__ ("fcvtzu %d0,%d1,%2" \
5414 : "=w"(result) \
5415 : "w"(a_), "i"(b) \
5416 : /* No clobbers */); \
5417 result; \
5420 #define vcvtq_n_f32_s32(a, b) \
5421 __extension__ \
5422 ({ \
5423 int32x4_t a_ = (a); \
5424 float32x4_t result; \
5425 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5426 : "=w"(result) \
5427 : "w"(a_), "i"(b) \
5428 : /* No clobbers */); \
5429 result; \
5432 #define vcvtq_n_f32_u32(a, b) \
5433 __extension__ \
5434 ({ \
5435 uint32x4_t a_ = (a); \
5436 float32x4_t result; \
5437 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5438 : "=w"(result) \
5439 : "w"(a_), "i"(b) \
5440 : /* No clobbers */); \
5441 result; \
5444 #define vcvtq_n_f64_s64(a, b) \
5445 __extension__ \
5446 ({ \
5447 int64x2_t a_ = (a); \
5448 float64x2_t result; \
5449 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5450 : "=w"(result) \
5451 : "w"(a_), "i"(b) \
5452 : /* No clobbers */); \
5453 result; \
5456 #define vcvtq_n_f64_u64(a, b) \
5457 __extension__ \
5458 ({ \
5459 uint64x2_t a_ = (a); \
5460 float64x2_t result; \
5461 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5462 : "=w"(result) \
5463 : "w"(a_), "i"(b) \
5464 : /* No clobbers */); \
5465 result; \
5468 #define vcvtq_n_s32_f32(a, b) \
5469 __extension__ \
5470 ({ \
5471 float32x4_t a_ = (a); \
5472 int32x4_t result; \
5473 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5474 : "=w"(result) \
5475 : "w"(a_), "i"(b) \
5476 : /* No clobbers */); \
5477 result; \
5480 #define vcvtq_n_s64_f64(a, b) \
5481 __extension__ \
5482 ({ \
5483 float64x2_t a_ = (a); \
5484 int64x2_t result; \
5485 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5486 : "=w"(result) \
5487 : "w"(a_), "i"(b) \
5488 : /* No clobbers */); \
5489 result; \
5492 #define vcvtq_n_u32_f32(a, b) \
5493 __extension__ \
5494 ({ \
5495 float32x4_t a_ = (a); \
5496 uint32x4_t result; \
5497 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5498 : "=w"(result) \
5499 : "w"(a_), "i"(b) \
5500 : /* No clobbers */); \
5501 result; \
5504 #define vcvtq_n_u64_f64(a, b) \
5505 __extension__ \
5506 ({ \
5507 float64x2_t a_ = (a); \
5508 uint64x2_t result; \
5509 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5510 : "=w"(result) \
5511 : "w"(a_), "i"(b) \
5512 : /* No clobbers */); \
5513 result; \
5516 #define vcvts_n_f32_s32(a, b) \
5517 __extension__ \
5518 ({ \
5519 int32_t a_ = (a); \
5520 int32_t result; \
5521 __asm__ ("scvtf %s0,%s1,%2" \
5522 : "=w"(result) \
5523 : "w"(a_), "i"(b) \
5524 : /* No clobbers */); \
5525 result; \
5528 #define vcvts_n_f32_u32(a, b) \
5529 __extension__ \
5530 ({ \
5531 uint32_t a_ = (a); \
5532 uint32_t result; \
5533 __asm__ ("ucvtf %s0,%s1,%2" \
5534 : "=w"(result) \
5535 : "w"(a_), "i"(b) \
5536 : /* No clobbers */); \
5537 result; \
5540 #define vcvts_n_s32_f32(a, b) \
5541 __extension__ \
5542 ({ \
5543 float32_t a_ = (a); \
5544 float32_t result; \
5545 __asm__ ("fcvtzs %s0,%s1,%2" \
5546 : "=w"(result) \
5547 : "w"(a_), "i"(b) \
5548 : /* No clobbers */); \
5549 result; \
5552 #define vcvts_n_u32_f32(a, b) \
5553 __extension__ \
5554 ({ \
5555 float32_t a_ = (a); \
5556 float32_t result; \
5557 __asm__ ("fcvtzu %s0,%s1,%2" \
5558 : "=w"(result) \
5559 : "w"(a_), "i"(b) \
5560 : /* No clobbers */); \
5561 result; \
5564 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5565 vcvtx_f32_f64 (float64x2_t a)
5567 float32x2_t result;
5568 __asm__ ("fcvtxn %0.2s,%1.2d"
5569 : "=w"(result)
5570 : "w"(a)
5571 : /* No clobbers */);
5572 return result;
5575 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5576 vcvtx_high_f32_f64 (float64x2_t a)
5578 float32x4_t result;
5579 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5580 : "=w"(result)
5581 : "w"(a)
5582 : /* No clobbers */);
5583 return result;
5586 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5587 vcvtxd_f32_f64 (float64_t a)
5589 float32_t result;
5590 __asm__ ("fcvtxn %s0,%d1"
5591 : "=w"(result)
5592 : "w"(a)
5593 : /* No clobbers */);
5594 return result;
5597 #define vdup_lane_f32(a, b) \
5598 __extension__ \
5599 ({ \
5600 float32x2_t a_ = (a); \
5601 float32x2_t result; \
5602 __asm__ ("dup %0.2s,%1.s[%2]" \
5603 : "=w"(result) \
5604 : "w"(a_), "i"(b) \
5605 : /* No clobbers */); \
5606 result; \
5609 #define vdup_lane_p8(a, b) \
5610 __extension__ \
5611 ({ \
5612 poly8x8_t a_ = (a); \
5613 poly8x8_t result; \
5614 __asm__ ("dup %0.8b,%1.b[%2]" \
5615 : "=w"(result) \
5616 : "w"(a_), "i"(b) \
5617 : /* No clobbers */); \
5618 result; \
5621 #define vdup_lane_p16(a, b) \
5622 __extension__ \
5623 ({ \
5624 poly16x4_t a_ = (a); \
5625 poly16x4_t result; \
5626 __asm__ ("dup %0.4h,%1.h[%2]" \
5627 : "=w"(result) \
5628 : "w"(a_), "i"(b) \
5629 : /* No clobbers */); \
5630 result; \
5633 #define vdup_lane_s8(a, b) \
5634 __extension__ \
5635 ({ \
5636 int8x8_t a_ = (a); \
5637 int8x8_t result; \
5638 __asm__ ("dup %0.8b,%1.b[%2]" \
5639 : "=w"(result) \
5640 : "w"(a_), "i"(b) \
5641 : /* No clobbers */); \
5642 result; \
5645 #define vdup_lane_s16(a, b) \
5646 __extension__ \
5647 ({ \
5648 int16x4_t a_ = (a); \
5649 int16x4_t result; \
5650 __asm__ ("dup %0.4h,%1.h[%2]" \
5651 : "=w"(result) \
5652 : "w"(a_), "i"(b) \
5653 : /* No clobbers */); \
5654 result; \
5657 #define vdup_lane_s32(a, b) \
5658 __extension__ \
5659 ({ \
5660 int32x2_t a_ = (a); \
5661 int32x2_t result; \
5662 __asm__ ("dup %0.2s,%1.s[%2]" \
5663 : "=w"(result) \
5664 : "w"(a_), "i"(b) \
5665 : /* No clobbers */); \
5666 result; \
5669 #define vdup_lane_s64(a, b) \
5670 __extension__ \
5671 ({ \
5672 int64x1_t a_ = (a); \
5673 int64x1_t result; \
5674 __asm__ ("ins %0.d[0],%1.d[%2]" \
5675 : "=w"(result) \
5676 : "w"(a_), "i"(b) \
5677 : /* No clobbers */); \
5678 result; \
5681 #define vdup_lane_u8(a, b) \
5682 __extension__ \
5683 ({ \
5684 uint8x8_t a_ = (a); \
5685 uint8x8_t result; \
5686 __asm__ ("dup %0.8b,%1.b[%2]" \
5687 : "=w"(result) \
5688 : "w"(a_), "i"(b) \
5689 : /* No clobbers */); \
5690 result; \
5693 #define vdup_lane_u16(a, b) \
5694 __extension__ \
5695 ({ \
5696 uint16x4_t a_ = (a); \
5697 uint16x4_t result; \
5698 __asm__ ("dup %0.4h,%1.h[%2]" \
5699 : "=w"(result) \
5700 : "w"(a_), "i"(b) \
5701 : /* No clobbers */); \
5702 result; \
5705 #define vdup_lane_u32(a, b) \
5706 __extension__ \
5707 ({ \
5708 uint32x2_t a_ = (a); \
5709 uint32x2_t result; \
5710 __asm__ ("dup %0.2s,%1.s[%2]" \
5711 : "=w"(result) \
5712 : "w"(a_), "i"(b) \
5713 : /* No clobbers */); \
5714 result; \
5717 #define vdup_lane_u64(a, b) \
5718 __extension__ \
5719 ({ \
5720 uint64x1_t a_ = (a); \
5721 uint64x1_t result; \
5722 __asm__ ("ins %0.d[0],%1.d[%2]" \
5723 : "=w"(result) \
5724 : "w"(a_), "i"(b) \
5725 : /* No clobbers */); \
5726 result; \
5729 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5730 vdup_n_f32 (float32_t a)
5732 float32x2_t result;
5733 __asm__ ("dup %0.2s, %w1"
5734 : "=w"(result)
5735 : "r"(a)
5736 : /* No clobbers */);
5737 return result;
5740 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5741 vdup_n_p8 (uint32_t a)
5743 poly8x8_t result;
5744 __asm__ ("dup %0.8b,%w1"
5745 : "=w"(result)
5746 : "r"(a)
5747 : /* No clobbers */);
5748 return result;
5751 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5752 vdup_n_p16 (uint32_t a)
5754 poly16x4_t result;
5755 __asm__ ("dup %0.4h,%w1"
5756 : "=w"(result)
5757 : "r"(a)
5758 : /* No clobbers */);
5759 return result;
5762 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5763 vdup_n_s8 (int32_t a)
5765 int8x8_t result;
5766 __asm__ ("dup %0.8b,%w1"
5767 : "=w"(result)
5768 : "r"(a)
5769 : /* No clobbers */);
5770 return result;
5773 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5774 vdup_n_s16 (int32_t a)
5776 int16x4_t result;
5777 __asm__ ("dup %0.4h,%w1"
5778 : "=w"(result)
5779 : "r"(a)
5780 : /* No clobbers */);
5781 return result;
5784 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5785 vdup_n_s32 (int32_t a)
5787 int32x2_t result;
5788 __asm__ ("dup %0.2s,%w1"
5789 : "=w"(result)
5790 : "r"(a)
5791 : /* No clobbers */);
5792 return result;
5795 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5796 vdup_n_s64 (int64_t a)
5798 int64x1_t result;
5799 __asm__ ("ins %0.d[0],%x1"
5800 : "=w"(result)
5801 : "r"(a)
5802 : /* No clobbers */);
5803 return result;
5806 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5807 vdup_n_u8 (uint32_t a)
5809 uint8x8_t result;
5810 __asm__ ("dup %0.8b,%w1"
5811 : "=w"(result)
5812 : "r"(a)
5813 : /* No clobbers */);
5814 return result;
5817 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5818 vdup_n_u16 (uint32_t a)
5820 uint16x4_t result;
5821 __asm__ ("dup %0.4h,%w1"
5822 : "=w"(result)
5823 : "r"(a)
5824 : /* No clobbers */);
5825 return result;
5828 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5829 vdup_n_u32 (uint32_t a)
5831 uint32x2_t result;
5832 __asm__ ("dup %0.2s,%w1"
5833 : "=w"(result)
5834 : "r"(a)
5835 : /* No clobbers */);
5836 return result;
5839 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5840 vdup_n_u64 (uint64_t a)
5842 uint64x1_t result;
5843 __asm__ ("ins %0.d[0],%x1"
5844 : "=w"(result)
5845 : "r"(a)
5846 : /* No clobbers */);
5847 return result;
5850 #define vdupd_lane_f64(a, b) \
5851 __extension__ \
5852 ({ \
5853 float64x2_t a_ = (a); \
5854 float64_t result; \
5855 __asm__ ("dup %d0, %1.d[%2]" \
5856 : "=w"(result) \
5857 : "w"(a_), "i"(b) \
5858 : /* No clobbers */); \
5859 result; \
5862 #define vdupq_lane_f32(a, b) \
5863 __extension__ \
5864 ({ \
5865 float32x2_t a_ = (a); \
5866 float32x4_t result; \
5867 __asm__ ("dup %0.4s,%1.s[%2]" \
5868 : "=w"(result) \
5869 : "w"(a_), "i"(b) \
5870 : /* No clobbers */); \
5871 result; \
5874 #define vdupq_lane_f64(a, b) \
5875 __extension__ \
5876 ({ \
5877 float64x1_t a_ = (a); \
5878 float64x2_t result; \
5879 __asm__ ("dup %0.2d,%1.d[%2]" \
5880 : "=w"(result) \
5881 : "w"(a_), "i"(b) \
5882 : /* No clobbers */); \
5883 result; \
5886 #define vdupq_lane_p8(a, b) \
5887 __extension__ \
5888 ({ \
5889 poly8x8_t a_ = (a); \
5890 poly8x16_t result; \
5891 __asm__ ("dup %0.16b,%1.b[%2]" \
5892 : "=w"(result) \
5893 : "w"(a_), "i"(b) \
5894 : /* No clobbers */); \
5895 result; \
5898 #define vdupq_lane_p16(a, b) \
5899 __extension__ \
5900 ({ \
5901 poly16x4_t a_ = (a); \
5902 poly16x8_t result; \
5903 __asm__ ("dup %0.8h,%1.h[%2]" \
5904 : "=w"(result) \
5905 : "w"(a_), "i"(b) \
5906 : /* No clobbers */); \
5907 result; \
5910 #define vdupq_lane_s8(a, b) \
5911 __extension__ \
5912 ({ \
5913 int8x8_t a_ = (a); \
5914 int8x16_t result; \
5915 __asm__ ("dup %0.16b,%1.b[%2]" \
5916 : "=w"(result) \
5917 : "w"(a_), "i"(b) \
5918 : /* No clobbers */); \
5919 result; \
5922 #define vdupq_lane_s16(a, b) \
5923 __extension__ \
5924 ({ \
5925 int16x4_t a_ = (a); \
5926 int16x8_t result; \
5927 __asm__ ("dup %0.8h,%1.h[%2]" \
5928 : "=w"(result) \
5929 : "w"(a_), "i"(b) \
5930 : /* No clobbers */); \
5931 result; \
5934 #define vdupq_lane_s32(a, b) \
5935 __extension__ \
5936 ({ \
5937 int32x2_t a_ = (a); \
5938 int32x4_t result; \
5939 __asm__ ("dup %0.4s,%1.s[%2]" \
5940 : "=w"(result) \
5941 : "w"(a_), "i"(b) \
5942 : /* No clobbers */); \
5943 result; \
5946 #define vdupq_lane_s64(a, b) \
5947 __extension__ \
5948 ({ \
5949 int64x1_t a_ = (a); \
5950 int64x2_t result; \
5951 __asm__ ("dup %0.2d,%1.d[%2]" \
5952 : "=w"(result) \
5953 : "w"(a_), "i"(b) \
5954 : /* No clobbers */); \
5955 result; \
5958 #define vdupq_lane_u8(a, b) \
5959 __extension__ \
5960 ({ \
5961 uint8x8_t a_ = (a); \
5962 uint8x16_t result; \
5963 __asm__ ("dup %0.16b,%1.b[%2]" \
5964 : "=w"(result) \
5965 : "w"(a_), "i"(b) \
5966 : /* No clobbers */); \
5967 result; \
5970 #define vdupq_lane_u16(a, b) \
5971 __extension__ \
5972 ({ \
5973 uint16x4_t a_ = (a); \
5974 uint16x8_t result; \
5975 __asm__ ("dup %0.8h,%1.h[%2]" \
5976 : "=w"(result) \
5977 : "w"(a_), "i"(b) \
5978 : /* No clobbers */); \
5979 result; \
5982 #define vdupq_lane_u32(a, b) \
5983 __extension__ \
5984 ({ \
5985 uint32x2_t a_ = (a); \
5986 uint32x4_t result; \
5987 __asm__ ("dup %0.4s,%1.s[%2]" \
5988 : "=w"(result) \
5989 : "w"(a_), "i"(b) \
5990 : /* No clobbers */); \
5991 result; \
5994 #define vdupq_lane_u64(a, b) \
5995 __extension__ \
5996 ({ \
5997 uint64x1_t a_ = (a); \
5998 uint64x2_t result; \
5999 __asm__ ("dup %0.2d,%1.d[%2]" \
6000 : "=w"(result) \
6001 : "w"(a_), "i"(b) \
6002 : /* No clobbers */); \
6003 result; \
6006 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6007 vdupq_n_f32 (float32_t a)
6009 float32x4_t result;
6010 __asm__ ("dup %0.4s, %w1"
6011 : "=w"(result)
6012 : "r"(a)
6013 : /* No clobbers */);
6014 return result;
6017 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6018 vdupq_n_f64 (float64_t a)
6020 float64x2_t result;
6021 __asm__ ("dup %0.2d, %x1"
6022 : "=w"(result)
6023 : "r"(a)
6024 : /* No clobbers */);
6025 return result;
6028 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6029 vdupq_n_p8 (uint32_t a)
6031 poly8x16_t result;
6032 __asm__ ("dup %0.16b,%w1"
6033 : "=w"(result)
6034 : "r"(a)
6035 : /* No clobbers */);
6036 return result;
6039 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6040 vdupq_n_p16 (uint32_t a)
6042 poly16x8_t result;
6043 __asm__ ("dup %0.8h,%w1"
6044 : "=w"(result)
6045 : "r"(a)
6046 : /* No clobbers */);
6047 return result;
6050 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6051 vdupq_n_s8 (int32_t a)
6053 int8x16_t result;
6054 __asm__ ("dup %0.16b,%w1"
6055 : "=w"(result)
6056 : "r"(a)
6057 : /* No clobbers */);
6058 return result;
6061 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6062 vdupq_n_s16 (int32_t a)
6064 int16x8_t result;
6065 __asm__ ("dup %0.8h,%w1"
6066 : "=w"(result)
6067 : "r"(a)
6068 : /* No clobbers */);
6069 return result;
6072 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6073 vdupq_n_s32 (int32_t a)
6075 int32x4_t result;
6076 __asm__ ("dup %0.4s,%w1"
6077 : "=w"(result)
6078 : "r"(a)
6079 : /* No clobbers */);
6080 return result;
6083 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6084 vdupq_n_s64 (int64_t a)
6086 int64x2_t result;
6087 __asm__ ("dup %0.2d,%x1"
6088 : "=w"(result)
6089 : "r"(a)
6090 : /* No clobbers */);
6091 return result;
6094 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6095 vdupq_n_u8 (uint32_t a)
6097 uint8x16_t result;
6098 __asm__ ("dup %0.16b,%w1"
6099 : "=w"(result)
6100 : "r"(a)
6101 : /* No clobbers */);
6102 return result;
6105 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6106 vdupq_n_u16 (uint32_t a)
6108 uint16x8_t result;
6109 __asm__ ("dup %0.8h,%w1"
6110 : "=w"(result)
6111 : "r"(a)
6112 : /* No clobbers */);
6113 return result;
6116 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6117 vdupq_n_u32 (uint32_t a)
6119 uint32x4_t result;
6120 __asm__ ("dup %0.4s,%w1"
6121 : "=w"(result)
6122 : "r"(a)
6123 : /* No clobbers */);
6124 return result;
6127 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6128 vdupq_n_u64 (uint64_t a)
6130 uint64x2_t result;
6131 __asm__ ("dup %0.2d,%x1"
6132 : "=w"(result)
6133 : "r"(a)
6134 : /* No clobbers */);
6135 return result;
6138 #define vdups_lane_f32(a, b) \
6139 __extension__ \
6140 ({ \
6141 float32x4_t a_ = (a); \
6142 float32_t result; \
6143 __asm__ ("dup %s0, %1.s[%2]" \
6144 : "=w"(result) \
6145 : "w"(a_), "i"(b) \
6146 : /* No clobbers */); \
6147 result; \
6150 #define vext_f32(a, b, c) \
6151 __extension__ \
6152 ({ \
6153 float32x2_t b_ = (b); \
6154 float32x2_t a_ = (a); \
6155 float32x2_t result; \
6156 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
6157 : "=w"(result) \
6158 : "w"(a_), "w"(b_), "i"(c) \
6159 : /* No clobbers */); \
6160 result; \
6163 #define vext_f64(a, b, c) \
6164 __extension__ \
6165 ({ \
6166 float64x1_t b_ = (b); \
6167 float64x1_t a_ = (a); \
6168 float64x1_t result; \
6169 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
6170 : "=w"(result) \
6171 : "w"(a_), "w"(b_), "i"(c) \
6172 : /* No clobbers */); \
6173 result; \
6176 #define vext_p8(a, b, c) \
6177 __extension__ \
6178 ({ \
6179 poly8x8_t b_ = (b); \
6180 poly8x8_t a_ = (a); \
6181 poly8x8_t result; \
6182 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
6183 : "=w"(result) \
6184 : "w"(a_), "w"(b_), "i"(c) \
6185 : /* No clobbers */); \
6186 result; \
6189 #define vext_p16(a, b, c) \
6190 __extension__ \
6191 ({ \
6192 poly16x4_t b_ = (b); \
6193 poly16x4_t a_ = (a); \
6194 poly16x4_t result; \
6195 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
6196 : "=w"(result) \
6197 : "w"(a_), "w"(b_), "i"(c) \
6198 : /* No clobbers */); \
6199 result; \
6202 #define vext_s8(a, b, c) \
6203 __extension__ \
6204 ({ \
6205 int8x8_t b_ = (b); \
6206 int8x8_t a_ = (a); \
6207 int8x8_t result; \
6208 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
6209 : "=w"(result) \
6210 : "w"(a_), "w"(b_), "i"(c) \
6211 : /* No clobbers */); \
6212 result; \
6215 #define vext_s16(a, b, c) \
6216 __extension__ \
6217 ({ \
6218 int16x4_t b_ = (b); \
6219 int16x4_t a_ = (a); \
6220 int16x4_t result; \
6221 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
6222 : "=w"(result) \
6223 : "w"(a_), "w"(b_), "i"(c) \
6224 : /* No clobbers */); \
6225 result; \
6228 #define vext_s32(a, b, c) \
6229 __extension__ \
6230 ({ \
6231 int32x2_t b_ = (b); \
6232 int32x2_t a_ = (a); \
6233 int32x2_t result; \
6234 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
6235 : "=w"(result) \
6236 : "w"(a_), "w"(b_), "i"(c) \
6237 : /* No clobbers */); \
6238 result; \
6241 #define vext_s64(a, b, c) \
6242 __extension__ \
6243 ({ \
6244 int64x1_t b_ = (b); \
6245 int64x1_t a_ = (a); \
6246 int64x1_t result; \
6247 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
6248 : "=w"(result) \
6249 : "w"(a_), "w"(b_), "i"(c) \
6250 : /* No clobbers */); \
6251 result; \
6254 #define vext_u8(a, b, c) \
6255 __extension__ \
6256 ({ \
6257 uint8x8_t b_ = (b); \
6258 uint8x8_t a_ = (a); \
6259 uint8x8_t result; \
6260 __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \
6261 : "=w"(result) \
6262 : "w"(a_), "w"(b_), "i"(c) \
6263 : /* No clobbers */); \
6264 result; \
6267 #define vext_u16(a, b, c) \
6268 __extension__ \
6269 ({ \
6270 uint16x4_t b_ = (b); \
6271 uint16x4_t a_ = (a); \
6272 uint16x4_t result; \
6273 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \
6274 : "=w"(result) \
6275 : "w"(a_), "w"(b_), "i"(c) \
6276 : /* No clobbers */); \
6277 result; \
6280 #define vext_u32(a, b, c) \
6281 __extension__ \
6282 ({ \
6283 uint32x2_t b_ = (b); \
6284 uint32x2_t a_ = (a); \
6285 uint32x2_t result; \
6286 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \
6287 : "=w"(result) \
6288 : "w"(a_), "w"(b_), "i"(c) \
6289 : /* No clobbers */); \
6290 result; \
6293 #define vext_u64(a, b, c) \
6294 __extension__ \
6295 ({ \
6296 uint64x1_t b_ = (b); \
6297 uint64x1_t a_ = (a); \
6298 uint64x1_t result; \
6299 __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \
6300 : "=w"(result) \
6301 : "w"(a_), "w"(b_), "i"(c) \
6302 : /* No clobbers */); \
6303 result; \
6306 #define vextq_f32(a, b, c) \
6307 __extension__ \
6308 ({ \
6309 float32x4_t b_ = (b); \
6310 float32x4_t a_ = (a); \
6311 float32x4_t result; \
6312 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6313 : "=w"(result) \
6314 : "w"(a_), "w"(b_), "i"(c) \
6315 : /* No clobbers */); \
6316 result; \
6319 #define vextq_f64(a, b, c) \
6320 __extension__ \
6321 ({ \
6322 float64x2_t b_ = (b); \
6323 float64x2_t a_ = (a); \
6324 float64x2_t result; \
6325 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6326 : "=w"(result) \
6327 : "w"(a_), "w"(b_), "i"(c) \
6328 : /* No clobbers */); \
6329 result; \
6332 #define vextq_p8(a, b, c) \
6333 __extension__ \
6334 ({ \
6335 poly8x16_t b_ = (b); \
6336 poly8x16_t a_ = (a); \
6337 poly8x16_t result; \
6338 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6339 : "=w"(result) \
6340 : "w"(a_), "w"(b_), "i"(c) \
6341 : /* No clobbers */); \
6342 result; \
6345 #define vextq_p16(a, b, c) \
6346 __extension__ \
6347 ({ \
6348 poly16x8_t b_ = (b); \
6349 poly16x8_t a_ = (a); \
6350 poly16x8_t result; \
6351 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6352 : "=w"(result) \
6353 : "w"(a_), "w"(b_), "i"(c) \
6354 : /* No clobbers */); \
6355 result; \
6358 #define vextq_s8(a, b, c) \
6359 __extension__ \
6360 ({ \
6361 int8x16_t b_ = (b); \
6362 int8x16_t a_ = (a); \
6363 int8x16_t result; \
6364 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6365 : "=w"(result) \
6366 : "w"(a_), "w"(b_), "i"(c) \
6367 : /* No clobbers */); \
6368 result; \
6371 #define vextq_s16(a, b, c) \
6372 __extension__ \
6373 ({ \
6374 int16x8_t b_ = (b); \
6375 int16x8_t a_ = (a); \
6376 int16x8_t result; \
6377 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6378 : "=w"(result) \
6379 : "w"(a_), "w"(b_), "i"(c) \
6380 : /* No clobbers */); \
6381 result; \
6384 #define vextq_s32(a, b, c) \
6385 __extension__ \
6386 ({ \
6387 int32x4_t b_ = (b); \
6388 int32x4_t a_ = (a); \
6389 int32x4_t result; \
6390 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6391 : "=w"(result) \
6392 : "w"(a_), "w"(b_), "i"(c) \
6393 : /* No clobbers */); \
6394 result; \
6397 #define vextq_s64(a, b, c) \
6398 __extension__ \
6399 ({ \
6400 int64x2_t b_ = (b); \
6401 int64x2_t a_ = (a); \
6402 int64x2_t result; \
6403 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6404 : "=w"(result) \
6405 : "w"(a_), "w"(b_), "i"(c) \
6406 : /* No clobbers */); \
6407 result; \
6410 #define vextq_u8(a, b, c) \
6411 __extension__ \
6412 ({ \
6413 uint8x16_t b_ = (b); \
6414 uint8x16_t a_ = (a); \
6415 uint8x16_t result; \
6416 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \
6417 : "=w"(result) \
6418 : "w"(a_), "w"(b_), "i"(c) \
6419 : /* No clobbers */); \
6420 result; \
6423 #define vextq_u16(a, b, c) \
6424 __extension__ \
6425 ({ \
6426 uint16x8_t b_ = (b); \
6427 uint16x8_t a_ = (a); \
6428 uint16x8_t result; \
6429 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \
6430 : "=w"(result) \
6431 : "w"(a_), "w"(b_), "i"(c) \
6432 : /* No clobbers */); \
6433 result; \
6436 #define vextq_u32(a, b, c) \
6437 __extension__ \
6438 ({ \
6439 uint32x4_t b_ = (b); \
6440 uint32x4_t a_ = (a); \
6441 uint32x4_t result; \
6442 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \
6443 : "=w"(result) \
6444 : "w"(a_), "w"(b_), "i"(c) \
6445 : /* No clobbers */); \
6446 result; \
6449 #define vextq_u64(a, b, c) \
6450 __extension__ \
6451 ({ \
6452 uint64x2_t b_ = (b); \
6453 uint64x2_t a_ = (a); \
6454 uint64x2_t result; \
6455 __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \
6456 : "=w"(result) \
6457 : "w"(a_), "w"(b_), "i"(c) \
6458 : /* No clobbers */); \
6459 result; \
6462 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6463 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
6465 float32x2_t result;
6466 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
6467 : "=w"(result)
6468 : "0"(a), "w"(b), "w"(c)
6469 : /* No clobbers */);
6470 return result;
6473 #define vfma_lane_f32(a, b, c, d) \
6474 __extension__ \
6475 ({ \
6476 float32x2_t c_ = (c); \
6477 float32x2_t b_ = (b); \
6478 float32x2_t a_ = (a); \
6479 float32x2_t result; \
6480 __asm__ ("fmla %0.2s,%2.2s,%3.s[%4]" \
6481 : "=w"(result) \
6482 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6483 : /* No clobbers */); \
6484 result; \
6487 #define vfmad_lane_f64(a, b, c) \
6488 __extension__ \
6489 ({ \
6490 float64x2_t b_ = (b); \
6491 float64_t a_ = (a); \
6492 float64_t result; \
6493 __asm__ ("fmla %d0,%d1,%2.d[%3]" \
6494 : "=w"(result) \
6495 : "w"(a_), "w"(b_), "i"(c) \
6496 : /* No clobbers */); \
6497 result; \
6500 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6501 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
6503 float32x4_t result;
6504 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
6505 : "=w"(result)
6506 : "0"(a), "w"(b), "w"(c)
6507 : /* No clobbers */);
6508 return result;
6511 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6512 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6514 float64x2_t result;
6515 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
6516 : "=w"(result)
6517 : "0"(a), "w"(b), "w"(c)
6518 : /* No clobbers */);
6519 return result;
6522 #define vfmaq_lane_f32(a, b, c, d) \
6523 __extension__ \
6524 ({ \
6525 float32x4_t c_ = (c); \
6526 float32x4_t b_ = (b); \
6527 float32x4_t a_ = (a); \
6528 float32x4_t result; \
6529 __asm__ ("fmla %0.4s,%2.4s,%3.s[%4]" \
6530 : "=w"(result) \
6531 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6532 : /* No clobbers */); \
6533 result; \
6536 #define vfmaq_lane_f64(a, b, c, d) \
6537 __extension__ \
6538 ({ \
6539 float64x2_t c_ = (c); \
6540 float64x2_t b_ = (b); \
6541 float64x2_t a_ = (a); \
6542 float64x2_t result; \
6543 __asm__ ("fmla %0.2d,%2.2d,%3.d[%4]" \
6544 : "=w"(result) \
6545 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6546 : /* No clobbers */); \
6547 result; \
6550 #define vfmas_lane_f32(a, b, c) \
6551 __extension__ \
6552 ({ \
6553 float32x4_t b_ = (b); \
6554 float32_t a_ = (a); \
6555 float32_t result; \
6556 __asm__ ("fmla %s0,%s1,%2.s[%3]" \
6557 : "=w"(result) \
6558 : "w"(a_), "w"(b_), "i"(c) \
6559 : /* No clobbers */); \
6560 result; \
6563 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6564 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6566 float32x2_t result;
6567 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
6568 : "=w"(result)
6569 : "0"(a), "w"(b), "w"(c)
6570 : /* No clobbers */);
6571 return result;
6574 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6575 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
6577 float32x4_t result;
6578 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
6579 : "=w"(result)
6580 : "0"(a), "w"(b), "w"(c)
6581 : /* No clobbers */);
6582 return result;
6585 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6586 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
6588 float64x2_t result;
6589 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
6590 : "=w"(result)
6591 : "0"(a), "w"(b), "w"(c)
6592 : /* No clobbers */);
6593 return result;
6596 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6597 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
6599 float32x2_t result;
6600 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
6601 : "=w"(result)
6602 : "0"(a), "w"(b), "w"(c)
6603 : /* No clobbers */);
6604 return result;
6607 #define vfmsd_lane_f64(a, b, c) \
6608 __extension__ \
6609 ({ \
6610 float64x2_t b_ = (b); \
6611 float64_t a_ = (a); \
6612 float64_t result; \
6613 __asm__ ("fmls %d0,%d1,%2.d[%3]" \
6614 : "=w"(result) \
6615 : "w"(a_), "w"(b_), "i"(c) \
6616 : /* No clobbers */); \
6617 result; \
6620 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6621 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
6623 float32x4_t result;
6624 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
6625 : "=w"(result)
6626 : "0"(a), "w"(b), "w"(c)
6627 : /* No clobbers */);
6628 return result;
6631 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6632 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
6634 float64x2_t result;
6635 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
6636 : "=w"(result)
6637 : "0"(a), "w"(b), "w"(c)
6638 : /* No clobbers */);
6639 return result;
6642 #define vfmss_lane_f32(a, b, c) \
6643 __extension__ \
6644 ({ \
6645 float32x4_t b_ = (b); \
6646 float32_t a_ = (a); \
6647 float32_t result; \
6648 __asm__ ("fmls %s0,%s1,%2.s[%3]" \
6649 : "=w"(result) \
6650 : "w"(a_), "w"(b_), "i"(c) \
6651 : /* No clobbers */); \
6652 result; \
6655 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6656 vget_high_f32 (float32x4_t a)
6658 float32x2_t result;
6659 __asm__ ("ins %0.d[0], %1.d[1]"
6660 : "=w"(result)
6661 : "w"(a)
6662 : /* No clobbers */);
6663 return result;
6666 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6667 vget_high_f64 (float64x2_t a)
6669 float64x1_t result;
6670 __asm__ ("ins %0.d[0], %1.d[1]"
6671 : "=w"(result)
6672 : "w"(a)
6673 : /* No clobbers */);
6674 return result;
6677 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6678 vget_high_p8 (poly8x16_t a)
6680 poly8x8_t result;
6681 __asm__ ("ins %0.d[0], %1.d[1]"
6682 : "=w"(result)
6683 : "w"(a)
6684 : /* No clobbers */);
6685 return result;
6688 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6689 vget_high_p16 (poly16x8_t a)
6691 poly16x4_t result;
6692 __asm__ ("ins %0.d[0], %1.d[1]"
6693 : "=w"(result)
6694 : "w"(a)
6695 : /* No clobbers */);
6696 return result;
6699 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6700 vget_high_s8 (int8x16_t a)
6702 int8x8_t result;
6703 __asm__ ("ins %0.d[0], %1.d[1]"
6704 : "=w"(result)
6705 : "w"(a)
6706 : /* No clobbers */);
6707 return result;
6710 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6711 vget_high_s16 (int16x8_t a)
6713 int16x4_t result;
6714 __asm__ ("ins %0.d[0], %1.d[1]"
6715 : "=w"(result)
6716 : "w"(a)
6717 : /* No clobbers */);
6718 return result;
6721 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6722 vget_high_s32 (int32x4_t a)
6724 int32x2_t result;
6725 __asm__ ("ins %0.d[0], %1.d[1]"
6726 : "=w"(result)
6727 : "w"(a)
6728 : /* No clobbers */);
6729 return result;
6732 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6733 vget_high_s64 (int64x2_t a)
6735 int64x1_t result;
6736 __asm__ ("ins %0.d[0], %1.d[1]"
6737 : "=w"(result)
6738 : "w"(a)
6739 : /* No clobbers */);
6740 return result;
6743 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6744 vget_high_u8 (uint8x16_t a)
6746 uint8x8_t result;
6747 __asm__ ("ins %0.d[0], %1.d[1]"
6748 : "=w"(result)
6749 : "w"(a)
6750 : /* No clobbers */);
6751 return result;
6754 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6755 vget_high_u16 (uint16x8_t a)
6757 uint16x4_t result;
6758 __asm__ ("ins %0.d[0], %1.d[1]"
6759 : "=w"(result)
6760 : "w"(a)
6761 : /* No clobbers */);
6762 return result;
6765 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6766 vget_high_u32 (uint32x4_t a)
6768 uint32x2_t result;
6769 __asm__ ("ins %0.d[0], %1.d[1]"
6770 : "=w"(result)
6771 : "w"(a)
6772 : /* No clobbers */);
6773 return result;
6776 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6777 vget_high_u64 (uint64x2_t a)
6779 uint64x1_t result;
6780 __asm__ ("ins %0.d[0], %1.d[1]"
6781 : "=w"(result)
6782 : "w"(a)
6783 : /* No clobbers */);
6784 return result;
6787 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6788 vget_low_f32 (float32x4_t a)
6790 float32x2_t result;
6791 __asm__ ("ins %0.d[0], %1.d[0]"
6792 : "=w"(result)
6793 : "w"(a)
6794 : /* No clobbers */);
6795 return result;
6798 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6799 vget_low_f64 (float64x2_t a)
6801 float64x1_t result;
6802 __asm__ ("ins %0.d[0], %1.d[0]"
6803 : "=w"(result)
6804 : "w"(a)
6805 : /* No clobbers */);
6806 return result;
6809 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6810 vget_low_p8 (poly8x16_t a)
6812 poly8x8_t result;
6813 __asm__ ("ins %0.d[0], %1.d[0]"
6814 : "=w"(result)
6815 : "w"(a)
6816 : /* No clobbers */);
6817 return result;
6820 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6821 vget_low_p16 (poly16x8_t a)
6823 poly16x4_t result;
6824 __asm__ ("ins %0.d[0], %1.d[0]"
6825 : "=w"(result)
6826 : "w"(a)
6827 : /* No clobbers */);
6828 return result;
6831 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6832 vget_low_s8 (int8x16_t a)
6834 int8x8_t result;
6835 __asm__ ("ins %0.d[0], %1.d[0]"
6836 : "=w"(result)
6837 : "w"(a)
6838 : /* No clobbers */);
6839 return result;
6842 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6843 vget_low_s16 (int16x8_t a)
6845 int16x4_t result;
6846 __asm__ ("ins %0.d[0], %1.d[0]"
6847 : "=w"(result)
6848 : "w"(a)
6849 : /* No clobbers */);
6850 return result;
6853 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6854 vget_low_s32 (int32x4_t a)
6856 int32x2_t result;
6857 __asm__ ("ins %0.d[0], %1.d[0]"
6858 : "=w"(result)
6859 : "w"(a)
6860 : /* No clobbers */);
6861 return result;
6864 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6865 vget_low_s64 (int64x2_t a)
6867 int64x1_t result;
6868 __asm__ ("ins %0.d[0], %1.d[0]"
6869 : "=w"(result)
6870 : "w"(a)
6871 : /* No clobbers */);
6872 return result;
6875 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6876 vget_low_u8 (uint8x16_t a)
6878 uint8x8_t result;
6879 __asm__ ("ins %0.d[0], %1.d[0]"
6880 : "=w"(result)
6881 : "w"(a)
6882 : /* No clobbers */);
6883 return result;
6886 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6887 vget_low_u16 (uint16x8_t a)
6889 uint16x4_t result;
6890 __asm__ ("ins %0.d[0], %1.d[0]"
6891 : "=w"(result)
6892 : "w"(a)
6893 : /* No clobbers */);
6894 return result;
6897 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6898 vget_low_u32 (uint32x4_t a)
6900 uint32x2_t result;
6901 __asm__ ("ins %0.d[0], %1.d[0]"
6902 : "=w"(result)
6903 : "w"(a)
6904 : /* No clobbers */);
6905 return result;
6908 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6909 vget_low_u64 (uint64x2_t a)
6911 uint64x1_t result;
6912 __asm__ ("ins %0.d[0], %1.d[0]"
6913 : "=w"(result)
6914 : "w"(a)
6915 : /* No clobbers */);
6916 return result;
6919 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6920 vhsub_s8 (int8x8_t a, int8x8_t b)
6922 int8x8_t result;
6923 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
6924 : "=w"(result)
6925 : "w"(a), "w"(b)
6926 : /* No clobbers */);
6927 return result;
6930 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6931 vhsub_s16 (int16x4_t a, int16x4_t b)
6933 int16x4_t result;
6934 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
6935 : "=w"(result)
6936 : "w"(a), "w"(b)
6937 : /* No clobbers */);
6938 return result;
6941 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6942 vhsub_s32 (int32x2_t a, int32x2_t b)
6944 int32x2_t result;
6945 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
6946 : "=w"(result)
6947 : "w"(a), "w"(b)
6948 : /* No clobbers */);
6949 return result;
6952 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6953 vhsub_u8 (uint8x8_t a, uint8x8_t b)
6955 uint8x8_t result;
6956 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
6957 : "=w"(result)
6958 : "w"(a), "w"(b)
6959 : /* No clobbers */);
6960 return result;
6963 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6964 vhsub_u16 (uint16x4_t a, uint16x4_t b)
6966 uint16x4_t result;
6967 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
6968 : "=w"(result)
6969 : "w"(a), "w"(b)
6970 : /* No clobbers */);
6971 return result;
6974 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6975 vhsub_u32 (uint32x2_t a, uint32x2_t b)
6977 uint32x2_t result;
6978 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
6979 : "=w"(result)
6980 : "w"(a), "w"(b)
6981 : /* No clobbers */);
6982 return result;
6985 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6986 vhsubq_s8 (int8x16_t a, int8x16_t b)
6988 int8x16_t result;
6989 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
6990 : "=w"(result)
6991 : "w"(a), "w"(b)
6992 : /* No clobbers */);
6993 return result;
6996 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6997 vhsubq_s16 (int16x8_t a, int16x8_t b)
6999 int16x8_t result;
7000 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
7001 : "=w"(result)
7002 : "w"(a), "w"(b)
7003 : /* No clobbers */);
7004 return result;
7007 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7008 vhsubq_s32 (int32x4_t a, int32x4_t b)
7010 int32x4_t result;
7011 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
7012 : "=w"(result)
7013 : "w"(a), "w"(b)
7014 : /* No clobbers */);
7015 return result;
7018 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7019 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
7021 uint8x16_t result;
7022 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
7023 : "=w"(result)
7024 : "w"(a), "w"(b)
7025 : /* No clobbers */);
7026 return result;
7029 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7030 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
7032 uint16x8_t result;
7033 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
7034 : "=w"(result)
7035 : "w"(a), "w"(b)
7036 : /* No clobbers */);
7037 return result;
7040 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7041 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
7043 uint32x4_t result;
7044 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
7045 : "=w"(result)
7046 : "w"(a), "w"(b)
7047 : /* No clobbers */);
7048 return result;
7051 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7052 vld1_dup_f32 (const float32_t * a)
7054 float32x2_t result;
7055 __asm__ ("ld1r {%0.2s}, %1"
7056 : "=w"(result)
7057 : "Utv"(*a)
7058 : /* No clobbers */);
7059 return result;
7062 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
7063 vld1_dup_f64 (const float64_t * a)
7065 float64x1_t result;
7066 __asm__ ("ld1r {%0.1d}, %1"
7067 : "=w"(result)
7068 : "Utv"(*a)
7069 : /* No clobbers */);
7070 return result;
7073 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
7074 vld1_dup_p8 (const poly8_t * a)
7076 poly8x8_t result;
7077 __asm__ ("ld1r {%0.8b}, %1"
7078 : "=w"(result)
7079 : "Utv"(*a)
7080 : /* No clobbers */);
7081 return result;
7084 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
7085 vld1_dup_p16 (const poly16_t * a)
7087 poly16x4_t result;
7088 __asm__ ("ld1r {%0.4h}, %1"
7089 : "=w"(result)
7090 : "Utv"(*a)
7091 : /* No clobbers */);
7092 return result;
7095 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7096 vld1_dup_s8 (const int8_t * a)
7098 int8x8_t result;
7099 __asm__ ("ld1r {%0.8b}, %1"
7100 : "=w"(result)
7101 : "Utv"(*a)
7102 : /* No clobbers */);
7103 return result;
7106 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7107 vld1_dup_s16 (const int16_t * a)
7109 int16x4_t result;
7110 __asm__ ("ld1r {%0.4h}, %1"
7111 : "=w"(result)
7112 : "Utv"(*a)
7113 : /* No clobbers */);
7114 return result;
7117 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7118 vld1_dup_s32 (const int32_t * a)
7120 int32x2_t result;
7121 __asm__ ("ld1r {%0.2s}, %1"
7122 : "=w"(result)
7123 : "Utv"(*a)
7124 : /* No clobbers */);
7125 return result;
7128 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
7129 vld1_dup_s64 (const int64_t * a)
7131 int64x1_t result;
7132 __asm__ ("ld1r {%0.1d}, %1"
7133 : "=w"(result)
7134 : "Utv"(*a)
7135 : /* No clobbers */);
7136 return result;
7139 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7140 vld1_dup_u8 (const uint8_t * a)
7142 uint8x8_t result;
7143 __asm__ ("ld1r {%0.8b}, %1"
7144 : "=w"(result)
7145 : "Utv"(*a)
7146 : /* No clobbers */);
7147 return result;
7150 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7151 vld1_dup_u16 (const uint16_t * a)
7153 uint16x4_t result;
7154 __asm__ ("ld1r {%0.4h}, %1"
7155 : "=w"(result)
7156 : "Utv"(*a)
7157 : /* No clobbers */);
7158 return result;
7161 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7162 vld1_dup_u32 (const uint32_t * a)
7164 uint32x2_t result;
7165 __asm__ ("ld1r {%0.2s}, %1"
7166 : "=w"(result)
7167 : "Utv"(*a)
7168 : /* No clobbers */);
7169 return result;
7172 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
7173 vld1_dup_u64 (const uint64_t * a)
7175 uint64x1_t result;
7176 __asm__ ("ld1r {%0.1d}, %1"
7177 : "=w"(result)
7178 : "Utv"(*a)
7179 : /* No clobbers */);
7180 return result;
7183 #define vld1_lane_f32(a, b, c) \
7184 __extension__ \
7185 ({ \
7186 float32x2_t b_ = (b); \
7187 const float32_t * a_ = (a); \
7188 float32x2_t result; \
7189 __asm__ ("ld1 {%0.s}[%1], %2" \
7190 : "=w"(result) \
7191 : "i" (c), "Utv"(*a_), "0"(b_) \
7192 : /* No clobbers */); \
7193 result; \
7196 #define vld1_lane_f64(a, b, c) \
7197 __extension__ \
7198 ({ \
7199 float64x1_t b_ = (b); \
7200 const float64_t * a_ = (a); \
7201 float64x1_t result; \
7202 __asm__ ("ld1 {%0.d}[%1], %2" \
7203 : "=w"(result) \
7204 : "i" (c), "Utv"(*a_), "0"(b_) \
7205 : /* No clobbers */); \
7206 result; \
7209 #define vld1_lane_p8(a, b, c) \
7210 __extension__ \
7211 ({ \
7212 poly8x8_t b_ = (b); \
7213 const poly8_t * a_ = (a); \
7214 poly8x8_t result; \
7215 __asm__ ("ld1 {%0.b}[%1], %2" \
7216 : "=w"(result) \
7217 : "i" (c), "Utv"(*a_), "0"(b_) \
7218 : /* No clobbers */); \
7219 result; \
7222 #define vld1_lane_p16(a, b, c) \
7223 __extension__ \
7224 ({ \
7225 poly16x4_t b_ = (b); \
7226 const poly16_t * a_ = (a); \
7227 poly16x4_t result; \
7228 __asm__ ("ld1 {%0.h}[%1], %2" \
7229 : "=w"(result) \
7230 : "i" (c), "Utv"(*a_), "0"(b_) \
7231 : /* No clobbers */); \
7232 result; \
7235 #define vld1_lane_s8(a, b, c) \
7236 __extension__ \
7237 ({ \
7238 int8x8_t b_ = (b); \
7239 const int8_t * a_ = (a); \
7240 int8x8_t result; \
7241 __asm__ ("ld1 {%0.b}[%1], %2" \
7242 : "=w"(result) \
7243 : "i" (c), "Utv"(*a_), "0"(b_) \
7244 : /* No clobbers */); \
7245 result; \
7248 #define vld1_lane_s16(a, b, c) \
7249 __extension__ \
7250 ({ \
7251 int16x4_t b_ = (b); \
7252 const int16_t * a_ = (a); \
7253 int16x4_t result; \
7254 __asm__ ("ld1 {%0.h}[%1], %2" \
7255 : "=w"(result) \
7256 : "i" (c), "Utv"(*a_), "0"(b_) \
7257 : /* No clobbers */); \
7258 result; \
7261 #define vld1_lane_s32(a, b, c) \
7262 __extension__ \
7263 ({ \
7264 int32x2_t b_ = (b); \
7265 const int32_t * a_ = (a); \
7266 int32x2_t result; \
7267 __asm__ ("ld1 {%0.s}[%1], %2" \
7268 : "=w"(result) \
7269 : "i" (c), "Utv"(*a_), "0"(b_) \
7270 : /* No clobbers */); \
7271 result; \
7274 #define vld1_lane_s64(a, b, c) \
7275 __extension__ \
7276 ({ \
7277 int64x1_t b_ = (b); \
7278 const int64_t * a_ = (a); \
7279 int64x1_t result; \
7280 __asm__ ("ld1 {%0.d}[%1], %2" \
7281 : "=w"(result) \
7282 : "i" (c), "Utv"(*a_), "0"(b_) \
7283 : /* No clobbers */); \
7284 result; \
7287 #define vld1_lane_u8(a, b, c) \
7288 __extension__ \
7289 ({ \
7290 uint8x8_t b_ = (b); \
7291 const uint8_t * a_ = (a); \
7292 uint8x8_t result; \
7293 __asm__ ("ld1 {%0.b}[%1], %2" \
7294 : "=w"(result) \
7295 : "i" (c), "Utv"(*a_), "0"(b_) \
7296 : /* No clobbers */); \
7297 result; \
7300 #define vld1_lane_u16(a, b, c) \
7301 __extension__ \
7302 ({ \
7303 uint16x4_t b_ = (b); \
7304 const uint16_t * a_ = (a); \
7305 uint16x4_t result; \
7306 __asm__ ("ld1 {%0.h}[%1], %2" \
7307 : "=w"(result) \
7308 : "i" (c), "Utv"(*a_), "0"(b_) \
7309 : /* No clobbers */); \
7310 result; \
7313 #define vld1_lane_u32(a, b, c) \
7314 __extension__ \
7315 ({ \
7316 uint32x2_t b_ = (b); \
7317 const uint32_t * a_ = (a); \
7318 uint32x2_t result; \
7319 __asm__ ("ld1 {%0.s}[%1], %2" \
7320 : "=w"(result) \
7321 : "i" (c), "Utv"(*a_), "0"(b_) \
7322 : /* No clobbers */); \
7323 result; \
7326 #define vld1_lane_u64(a, b, c) \
7327 __extension__ \
7328 ({ \
7329 uint64x1_t b_ = (b); \
7330 const uint64_t * a_ = (a); \
7331 uint64x1_t result; \
7332 __asm__ ("ld1 {%0.d}[%1], %2" \
7333 : "=w"(result) \
7334 : "i" (c), "Utv"(*a_), "0"(b_) \
7335 : /* No clobbers */); \
7336 result; \
7339 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7340 vld1q_dup_f32 (const float32_t * a)
7342 float32x4_t result;
7343 __asm__ ("ld1r {%0.4s}, %1"
7344 : "=w"(result)
7345 : "Utv"(*a)
7346 : /* No clobbers */);
7347 return result;
7350 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
7351 vld1q_dup_f64 (const float64_t * a)
7353 float64x2_t result;
7354 __asm__ ("ld1r {%0.2d}, %1"
7355 : "=w"(result)
7356 : "Utv"(*a)
7357 : /* No clobbers */);
7358 return result;
7361 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
7362 vld1q_dup_p8 (const poly8_t * a)
7364 poly8x16_t result;
7365 __asm__ ("ld1r {%0.16b}, %1"
7366 : "=w"(result)
7367 : "Utv"(*a)
7368 : /* No clobbers */);
7369 return result;
7372 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
7373 vld1q_dup_p16 (const poly16_t * a)
7375 poly16x8_t result;
7376 __asm__ ("ld1r {%0.8h}, %1"
7377 : "=w"(result)
7378 : "Utv"(*a)
7379 : /* No clobbers */);
7380 return result;
7383 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7384 vld1q_dup_s8 (const int8_t * a)
7386 int8x16_t result;
7387 __asm__ ("ld1r {%0.16b}, %1"
7388 : "=w"(result)
7389 : "Utv"(*a)
7390 : /* No clobbers */);
7391 return result;
7394 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7395 vld1q_dup_s16 (const int16_t * a)
7397 int16x8_t result;
7398 __asm__ ("ld1r {%0.8h}, %1"
7399 : "=w"(result)
7400 : "Utv"(*a)
7401 : /* No clobbers */);
7402 return result;
7405 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7406 vld1q_dup_s32 (const int32_t * a)
7408 int32x4_t result;
7409 __asm__ ("ld1r {%0.4s}, %1"
7410 : "=w"(result)
7411 : "Utv"(*a)
7412 : /* No clobbers */);
7413 return result;
7416 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7417 vld1q_dup_s64 (const int64_t * a)
7419 int64x2_t result;
7420 __asm__ ("ld1r {%0.2d}, %1"
7421 : "=w"(result)
7422 : "Utv"(*a)
7423 : /* No clobbers */);
7424 return result;
7427 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7428 vld1q_dup_u8 (const uint8_t * a)
7430 uint8x16_t result;
7431 __asm__ ("ld1r {%0.16b}, %1"
7432 : "=w"(result)
7433 : "Utv"(*a)
7434 : /* No clobbers */);
7435 return result;
7438 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7439 vld1q_dup_u16 (const uint16_t * a)
7441 uint16x8_t result;
7442 __asm__ ("ld1r {%0.8h}, %1"
7443 : "=w"(result)
7444 : "Utv"(*a)
7445 : /* No clobbers */);
7446 return result;
7449 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7450 vld1q_dup_u32 (const uint32_t * a)
7452 uint32x4_t result;
7453 __asm__ ("ld1r {%0.4s}, %1"
7454 : "=w"(result)
7455 : "Utv"(*a)
7456 : /* No clobbers */);
7457 return result;
7460 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7461 vld1q_dup_u64 (const uint64_t * a)
7463 uint64x2_t result;
7464 __asm__ ("ld1r {%0.2d}, %1"
7465 : "=w"(result)
7466 : "Utv"(*a)
7467 : /* No clobbers */);
7468 return result;
7471 #define vld1q_lane_f32(a, b, c) \
7472 __extension__ \
7473 ({ \
7474 float32x4_t b_ = (b); \
7475 const float32_t * a_ = (a); \
7476 float32x4_t result; \
7477 __asm__ ("ld1 {%0.s}[%1], %2" \
7478 : "=w"(result) \
7479 : "i"(c), "Utv"(*a_), "0"(b_) \
7480 : /* No clobbers */); \
7481 result; \
7484 #define vld1q_lane_f64(a, b, c) \
7485 __extension__ \
7486 ({ \
7487 float64x2_t b_ = (b); \
7488 const float64_t * a_ = (a); \
7489 float64x2_t result; \
7490 __asm__ ("ld1 {%0.d}[%1], %2" \
7491 : "=w"(result) \
7492 : "i"(c), "Utv"(*a_), "0"(b_) \
7493 : /* No clobbers */); \
7494 result; \
7497 #define vld1q_lane_p8(a, b, c) \
7498 __extension__ \
7499 ({ \
7500 poly8x16_t b_ = (b); \
7501 const poly8_t * a_ = (a); \
7502 poly8x16_t result; \
7503 __asm__ ("ld1 {%0.b}[%1], %2" \
7504 : "=w"(result) \
7505 : "i"(c), "Utv"(*a_), "0"(b_) \
7506 : /* No clobbers */); \
7507 result; \
7510 #define vld1q_lane_p16(a, b, c) \
7511 __extension__ \
7512 ({ \
7513 poly16x8_t b_ = (b); \
7514 const poly16_t * a_ = (a); \
7515 poly16x8_t result; \
7516 __asm__ ("ld1 {%0.h}[%1], %2" \
7517 : "=w"(result) \
7518 : "i"(c), "Utv"(*a_), "0"(b_) \
7519 : /* No clobbers */); \
7520 result; \
7523 #define vld1q_lane_s8(a, b, c) \
7524 __extension__ \
7525 ({ \
7526 int8x16_t b_ = (b); \
7527 const int8_t * a_ = (a); \
7528 int8x16_t result; \
7529 __asm__ ("ld1 {%0.b}[%1], %2" \
7530 : "=w"(result) \
7531 : "i"(c), "Utv"(*a_), "0"(b_) \
7532 : /* No clobbers */); \
7533 result; \
7536 #define vld1q_lane_s16(a, b, c) \
7537 __extension__ \
7538 ({ \
7539 int16x8_t b_ = (b); \
7540 const int16_t * a_ = (a); \
7541 int16x8_t result; \
7542 __asm__ ("ld1 {%0.h}[%1], %2" \
7543 : "=w"(result) \
7544 : "i"(c), "Utv"(*a_), "0"(b_) \
7545 : /* No clobbers */); \
7546 result; \
7549 #define vld1q_lane_s32(a, b, c) \
7550 __extension__ \
7551 ({ \
7552 int32x4_t b_ = (b); \
7553 const int32_t * a_ = (a); \
7554 int32x4_t result; \
7555 __asm__ ("ld1 {%0.s}[%1], %2" \
7556 : "=w"(result) \
7557 : "i"(c), "Utv"(*a_), "0"(b_) \
7558 : /* No clobbers */); \
7559 result; \
7562 #define vld1q_lane_s64(a, b, c) \
7563 __extension__ \
7564 ({ \
7565 int64x2_t b_ = (b); \
7566 const int64_t * a_ = (a); \
7567 int64x2_t result; \
7568 __asm__ ("ld1 {%0.d}[%1], %2" \
7569 : "=w"(result) \
7570 : "i"(c), "Utv"(*a_), "0"(b_) \
7571 : /* No clobbers */); \
7572 result; \
7575 #define vld1q_lane_u8(a, b, c) \
7576 __extension__ \
7577 ({ \
7578 uint8x16_t b_ = (b); \
7579 const uint8_t * a_ = (a); \
7580 uint8x16_t result; \
7581 __asm__ ("ld1 {%0.b}[%1], %2" \
7582 : "=w"(result) \
7583 : "i"(c), "Utv"(*a_), "0"(b_) \
7584 : /* No clobbers */); \
7585 result; \
7588 #define vld1q_lane_u16(a, b, c) \
7589 __extension__ \
7590 ({ \
7591 uint16x8_t b_ = (b); \
7592 const uint16_t * a_ = (a); \
7593 uint16x8_t result; \
7594 __asm__ ("ld1 {%0.h}[%1], %2" \
7595 : "=w"(result) \
7596 : "i"(c), "Utv"(*a_), "0"(b_) \
7597 : /* No clobbers */); \
7598 result; \
7601 #define vld1q_lane_u32(a, b, c) \
7602 __extension__ \
7603 ({ \
7604 uint32x4_t b_ = (b); \
7605 const uint32_t * a_ = (a); \
7606 uint32x4_t result; \
7607 __asm__ ("ld1 {%0.s}[%1], %2" \
7608 : "=w"(result) \
7609 : "i"(c), "Utv"(*a_), "0"(b_) \
7610 : /* No clobbers */); \
7611 result; \
7614 #define vld1q_lane_u64(a, b, c) \
7615 __extension__ \
7616 ({ \
7617 uint64x2_t b_ = (b); \
7618 const uint64_t * a_ = (a); \
7619 uint64x2_t result; \
7620 __asm__ ("ld1 {%0.d}[%1], %2" \
7621 : "=w"(result) \
7622 : "i"(c), "Utv"(*a_), "0"(b_) \
7623 : /* No clobbers */); \
7624 result; \
7627 #define vmla_lane_f32(a, b, c, d) \
7628 __extension__ \
7629 ({ \
7630 float32x2_t c_ = (c); \
7631 float32x2_t b_ = (b); \
7632 float32x2_t a_ = (a); \
7633 float32x2_t result; \
7634 float32x2_t t1; \
7635 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fadd %0.2s, %0.2s, %1.2s" \
7636 : "=w"(result), "=w"(t1) \
7637 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7638 : /* No clobbers */); \
7639 result; \
7642 #define vmla_lane_s16(a, b, c, d) \
7643 __extension__ \
7644 ({ \
7645 int16x4_t c_ = (c); \
7646 int16x4_t b_ = (b); \
7647 int16x4_t a_ = (a); \
7648 int16x4_t result; \
7649 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7650 : "=w"(result) \
7651 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7652 : /* No clobbers */); \
7653 result; \
7656 #define vmla_lane_s32(a, b, c, d) \
7657 __extension__ \
7658 ({ \
7659 int32x2_t c_ = (c); \
7660 int32x2_t b_ = (b); \
7661 int32x2_t a_ = (a); \
7662 int32x2_t result; \
7663 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7664 : "=w"(result) \
7665 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7666 : /* No clobbers */); \
7667 result; \
7670 #define vmla_lane_u16(a, b, c, d) \
7671 __extension__ \
7672 ({ \
7673 uint16x4_t c_ = (c); \
7674 uint16x4_t b_ = (b); \
7675 uint16x4_t a_ = (a); \
7676 uint16x4_t result; \
7677 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7678 : "=w"(result) \
7679 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7680 : /* No clobbers */); \
7681 result; \
7684 #define vmla_lane_u32(a, b, c, d) \
7685 __extension__ \
7686 ({ \
7687 uint32x2_t c_ = (c); \
7688 uint32x2_t b_ = (b); \
7689 uint32x2_t a_ = (a); \
7690 uint32x2_t result; \
7691 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7692 : "=w"(result) \
7693 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7694 : /* No clobbers */); \
7695 result; \
7698 #define vmla_laneq_s16(a, b, c, d) \
7699 __extension__ \
7700 ({ \
7701 int16x8_t c_ = (c); \
7702 int16x4_t b_ = (b); \
7703 int16x4_t a_ = (a); \
7704 int16x4_t result; \
7705 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7706 : "=w"(result) \
7707 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7708 : /* No clobbers */); \
7709 result; \
7712 #define vmla_laneq_s32(a, b, c, d) \
7713 __extension__ \
7714 ({ \
7715 int32x4_t c_ = (c); \
7716 int32x2_t b_ = (b); \
7717 int32x2_t a_ = (a); \
7718 int32x2_t result; \
7719 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7720 : "=w"(result) \
7721 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7722 : /* No clobbers */); \
7723 result; \
7726 #define vmla_laneq_u16(a, b, c, d) \
7727 __extension__ \
7728 ({ \
7729 uint16x8_t c_ = (c); \
7730 uint16x4_t b_ = (b); \
7731 uint16x4_t a_ = (a); \
7732 uint16x4_t result; \
7733 __asm__ ("mla %0.4h, %2.4h, %3.h[%4]" \
7734 : "=w"(result) \
7735 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7736 : /* No clobbers */); \
7737 result; \
7740 #define vmla_laneq_u32(a, b, c, d) \
7741 __extension__ \
7742 ({ \
7743 uint32x4_t c_ = (c); \
7744 uint32x2_t b_ = (b); \
7745 uint32x2_t a_ = (a); \
7746 uint32x2_t result; \
7747 __asm__ ("mla %0.2s, %2.2s, %3.s[%4]" \
7748 : "=w"(result) \
7749 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7750 : /* No clobbers */); \
7751 result; \
7754 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7755 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7757 float32x2_t result;
7758 float32x2_t t1;
7759 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
7760 : "=w"(result), "=w"(t1)
7761 : "0"(a), "w"(b), "w"(c)
7762 : /* No clobbers */);
7763 return result;
7766 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7767 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7769 int16x4_t result;
7770 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
7771 : "=w"(result)
7772 : "0"(a), "w"(b), "w"(c)
7773 : /* No clobbers */);
7774 return result;
7777 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7778 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7780 int32x2_t result;
7781 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
7782 : "=w"(result)
7783 : "0"(a), "w"(b), "w"(c)
7784 : /* No clobbers */);
7785 return result;
7788 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7789 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7791 uint16x4_t result;
7792 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
7793 : "=w"(result)
7794 : "0"(a), "w"(b), "w"(c)
7795 : /* No clobbers */);
7796 return result;
7799 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7800 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7802 uint32x2_t result;
7803 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
7804 : "=w"(result)
7805 : "0"(a), "w"(b), "w"(c)
7806 : /* No clobbers */);
7807 return result;
7810 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7811 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7813 int8x8_t result;
7814 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7815 : "=w"(result)
7816 : "0"(a), "w"(b), "w"(c)
7817 : /* No clobbers */);
7818 return result;
7821 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7822 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7824 int16x4_t result;
7825 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7826 : "=w"(result)
7827 : "0"(a), "w"(b), "w"(c)
7828 : /* No clobbers */);
7829 return result;
7832 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7833 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7835 int32x2_t result;
7836 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7837 : "=w"(result)
7838 : "0"(a), "w"(b), "w"(c)
7839 : /* No clobbers */);
7840 return result;
7843 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7844 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7846 uint8x8_t result;
7847 __asm__ ("mla %0.8b, %2.8b, %3.8b"
7848 : "=w"(result)
7849 : "0"(a), "w"(b), "w"(c)
7850 : /* No clobbers */);
7851 return result;
7854 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7855 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7857 uint16x4_t result;
7858 __asm__ ("mla %0.4h, %2.4h, %3.4h"
7859 : "=w"(result)
7860 : "0"(a), "w"(b), "w"(c)
7861 : /* No clobbers */);
7862 return result;
7865 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7866 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7868 uint32x2_t result;
7869 __asm__ ("mla %0.2s, %2.2s, %3.2s"
7870 : "=w"(result)
7871 : "0"(a), "w"(b), "w"(c)
7872 : /* No clobbers */);
7873 return result;
7876 #define vmlal_high_lane_s16(a, b, c, d) \
7877 __extension__ \
7878 ({ \
7879 int16x8_t c_ = (c); \
7880 int16x8_t b_ = (b); \
7881 int32x4_t a_ = (a); \
7882 int32x4_t result; \
7883 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7884 : "=w"(result) \
7885 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7886 : /* No clobbers */); \
7887 result; \
7890 #define vmlal_high_lane_s32(a, b, c, d) \
7891 __extension__ \
7892 ({ \
7893 int32x4_t c_ = (c); \
7894 int32x4_t b_ = (b); \
7895 int64x2_t a_ = (a); \
7896 int64x2_t result; \
7897 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7898 : "=w"(result) \
7899 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7900 : /* No clobbers */); \
7901 result; \
7904 #define vmlal_high_lane_u16(a, b, c, d) \
7905 __extension__ \
7906 ({ \
7907 uint16x8_t c_ = (c); \
7908 uint16x8_t b_ = (b); \
7909 uint32x4_t a_ = (a); \
7910 uint32x4_t result; \
7911 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7912 : "=w"(result) \
7913 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7914 : /* No clobbers */); \
7915 result; \
7918 #define vmlal_high_lane_u32(a, b, c, d) \
7919 __extension__ \
7920 ({ \
7921 uint32x4_t c_ = (c); \
7922 uint32x4_t b_ = (b); \
7923 uint64x2_t a_ = (a); \
7924 uint64x2_t result; \
7925 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7926 : "=w"(result) \
7927 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7928 : /* No clobbers */); \
7929 result; \
7932 #define vmlal_high_laneq_s16(a, b, c, d) \
7933 __extension__ \
7934 ({ \
7935 int16x8_t c_ = (c); \
7936 int16x8_t b_ = (b); \
7937 int32x4_t a_ = (a); \
7938 int32x4_t result; \
7939 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
7940 : "=w"(result) \
7941 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7942 : /* No clobbers */); \
7943 result; \
7946 #define vmlal_high_laneq_s32(a, b, c, d) \
7947 __extension__ \
7948 ({ \
7949 int32x4_t c_ = (c); \
7950 int32x4_t b_ = (b); \
7951 int64x2_t a_ = (a); \
7952 int64x2_t result; \
7953 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
7954 : "=w"(result) \
7955 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7956 : /* No clobbers */); \
7957 result; \
7960 #define vmlal_high_laneq_u16(a, b, c, d) \
7961 __extension__ \
7962 ({ \
7963 uint16x8_t c_ = (c); \
7964 uint16x8_t b_ = (b); \
7965 uint32x4_t a_ = (a); \
7966 uint32x4_t result; \
7967 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
7968 : "=w"(result) \
7969 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7970 : /* No clobbers */); \
7971 result; \
7974 #define vmlal_high_laneq_u32(a, b, c, d) \
7975 __extension__ \
7976 ({ \
7977 uint32x4_t c_ = (c); \
7978 uint32x4_t b_ = (b); \
7979 uint64x2_t a_ = (a); \
7980 uint64x2_t result; \
7981 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
7982 : "=w"(result) \
7983 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7984 : /* No clobbers */); \
7985 result; \
7988 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7989 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7991 int32x4_t result;
7992 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
7993 : "=w"(result)
7994 : "0"(a), "w"(b), "w"(c)
7995 : /* No clobbers */);
7996 return result;
7999 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8000 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
8002 int64x2_t result;
8003 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
8004 : "=w"(result)
8005 : "0"(a), "w"(b), "w"(c)
8006 : /* No clobbers */);
8007 return result;
8010 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8011 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
8013 uint32x4_t result;
8014 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
8015 : "=w"(result)
8016 : "0"(a), "w"(b), "w"(c)
8017 : /* No clobbers */);
8018 return result;
8021 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8022 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
8024 uint64x2_t result;
8025 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
8026 : "=w"(result)
8027 : "0"(a), "w"(b), "w"(c)
8028 : /* No clobbers */);
8029 return result;
8032 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8033 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
8035 int16x8_t result;
8036 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
8037 : "=w"(result)
8038 : "0"(a), "w"(b), "w"(c)
8039 : /* No clobbers */);
8040 return result;
8043 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8044 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
8046 int32x4_t result;
8047 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
8048 : "=w"(result)
8049 : "0"(a), "w"(b), "w"(c)
8050 : /* No clobbers */);
8051 return result;
8054 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8055 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
8057 int64x2_t result;
8058 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
8059 : "=w"(result)
8060 : "0"(a), "w"(b), "w"(c)
8061 : /* No clobbers */);
8062 return result;
8065 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8066 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
8068 uint16x8_t result;
8069 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
8070 : "=w"(result)
8071 : "0"(a), "w"(b), "w"(c)
8072 : /* No clobbers */);
8073 return result;
8076 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8077 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
8079 uint32x4_t result;
8080 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
8081 : "=w"(result)
8082 : "0"(a), "w"(b), "w"(c)
8083 : /* No clobbers */);
8084 return result;
8087 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8088 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
8090 uint64x2_t result;
8091 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
8092 : "=w"(result)
8093 : "0"(a), "w"(b), "w"(c)
8094 : /* No clobbers */);
8095 return result;
8098 #define vmlal_lane_s16(a, b, c, d) \
8099 __extension__ \
8100 ({ \
8101 int16x4_t c_ = (c); \
8102 int16x4_t b_ = (b); \
8103 int32x4_t a_ = (a); \
8104 int32x4_t result; \
8105 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
8106 : "=w"(result) \
8107 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8108 : /* No clobbers */); \
8109 result; \
8112 #define vmlal_lane_s32(a, b, c, d) \
8113 __extension__ \
8114 ({ \
8115 int32x2_t c_ = (c); \
8116 int32x2_t b_ = (b); \
8117 int64x2_t a_ = (a); \
8118 int64x2_t result; \
8119 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
8120 : "=w"(result) \
8121 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8122 : /* No clobbers */); \
8123 result; \
8126 #define vmlal_lane_u16(a, b, c, d) \
8127 __extension__ \
8128 ({ \
8129 uint16x4_t c_ = (c); \
8130 uint16x4_t b_ = (b); \
8131 uint32x4_t a_ = (a); \
8132 uint32x4_t result; \
8133 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
8134 : "=w"(result) \
8135 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8136 : /* No clobbers */); \
8137 result; \
8140 #define vmlal_lane_u32(a, b, c, d) \
8141 __extension__ \
8142 ({ \
8143 uint32x2_t c_ = (c); \
8144 uint32x2_t b_ = (b); \
8145 uint64x2_t a_ = (a); \
8146 uint64x2_t result; \
8147 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
8148 : "=w"(result) \
8149 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8150 : /* No clobbers */); \
8151 result; \
8154 #define vmlal_laneq_s16(a, b, c, d) \
8155 __extension__ \
8156 ({ \
8157 int16x8_t c_ = (c); \
8158 int16x4_t b_ = (b); \
8159 int32x4_t a_ = (a); \
8160 int32x4_t result; \
8161 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
8162 : "=w"(result) \
8163 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8164 : /* No clobbers */); \
8165 result; \
8168 #define vmlal_laneq_s32(a, b, c, d) \
8169 __extension__ \
8170 ({ \
8171 int32x4_t c_ = (c); \
8172 int32x2_t b_ = (b); \
8173 int64x2_t a_ = (a); \
8174 int64x2_t result; \
8175 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
8176 : "=w"(result) \
8177 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8178 : /* No clobbers */); \
8179 result; \
8182 #define vmlal_laneq_u16(a, b, c, d) \
8183 __extension__ \
8184 ({ \
8185 uint16x8_t c_ = (c); \
8186 uint16x4_t b_ = (b); \
8187 uint32x4_t a_ = (a); \
8188 uint32x4_t result; \
8189 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
8190 : "=w"(result) \
8191 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8192 : /* No clobbers */); \
8193 result; \
8196 #define vmlal_laneq_u32(a, b, c, d) \
8197 __extension__ \
8198 ({ \
8199 uint32x4_t c_ = (c); \
8200 uint32x2_t b_ = (b); \
8201 uint64x2_t a_ = (a); \
8202 uint64x2_t result; \
8203 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
8204 : "=w"(result) \
8205 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8206 : /* No clobbers */); \
8207 result; \
8210 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8211 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
8213 int32x4_t result;
8214 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
8215 : "=w"(result)
8216 : "0"(a), "w"(b), "w"(c)
8217 : /* No clobbers */);
8218 return result;
8221 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8222 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
8224 int64x2_t result;
8225 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
8226 : "=w"(result)
8227 : "0"(a), "w"(b), "w"(c)
8228 : /* No clobbers */);
8229 return result;
8232 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8233 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
8235 uint32x4_t result;
8236 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
8237 : "=w"(result)
8238 : "0"(a), "w"(b), "w"(c)
8239 : /* No clobbers */);
8240 return result;
8243 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8244 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
8246 uint64x2_t result;
8247 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
8248 : "=w"(result)
8249 : "0"(a), "w"(b), "w"(c)
8250 : /* No clobbers */);
8251 return result;
8254 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8255 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
8257 int16x8_t result;
8258 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
8259 : "=w"(result)
8260 : "0"(a), "w"(b), "w"(c)
8261 : /* No clobbers */);
8262 return result;
8265 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8266 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
8268 int32x4_t result;
8269 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
8270 : "=w"(result)
8271 : "0"(a), "w"(b), "w"(c)
8272 : /* No clobbers */);
8273 return result;
8276 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8277 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
8279 int64x2_t result;
8280 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
8281 : "=w"(result)
8282 : "0"(a), "w"(b), "w"(c)
8283 : /* No clobbers */);
8284 return result;
8287 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8288 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
8290 uint16x8_t result;
8291 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
8292 : "=w"(result)
8293 : "0"(a), "w"(b), "w"(c)
8294 : /* No clobbers */);
8295 return result;
8298 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8299 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
8301 uint32x4_t result;
8302 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
8303 : "=w"(result)
8304 : "0"(a), "w"(b), "w"(c)
8305 : /* No clobbers */);
8306 return result;
8309 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8310 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
8312 uint64x2_t result;
8313 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
8314 : "=w"(result)
8315 : "0"(a), "w"(b), "w"(c)
8316 : /* No clobbers */);
8317 return result;
8320 #define vmlaq_lane_f32(a, b, c, d) \
8321 __extension__ \
8322 ({ \
8323 float32x4_t c_ = (c); \
8324 float32x4_t b_ = (b); \
8325 float32x4_t a_ = (a); \
8326 float32x4_t result; \
8327 float32x4_t t1; \
8328 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fadd %0.4s, %0.4s, %1.4s" \
8329 : "=w"(result), "=w"(t1) \
8330 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8331 : /* No clobbers */); \
8332 result; \
8335 #define vmlaq_lane_s16(a, b, c, d) \
8336 __extension__ \
8337 ({ \
8338 int16x8_t c_ = (c); \
8339 int16x8_t b_ = (b); \
8340 int16x8_t a_ = (a); \
8341 int16x8_t result; \
8342 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8343 : "=w"(result) \
8344 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8345 : /* No clobbers */); \
8346 result; \
8349 #define vmlaq_lane_s32(a, b, c, d) \
8350 __extension__ \
8351 ({ \
8352 int32x4_t c_ = (c); \
8353 int32x4_t b_ = (b); \
8354 int32x4_t a_ = (a); \
8355 int32x4_t result; \
8356 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8357 : "=w"(result) \
8358 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8359 : /* No clobbers */); \
8360 result; \
8363 #define vmlaq_lane_u16(a, b, c, d) \
8364 __extension__ \
8365 ({ \
8366 uint16x8_t c_ = (c); \
8367 uint16x8_t b_ = (b); \
8368 uint16x8_t a_ = (a); \
8369 uint16x8_t result; \
8370 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8371 : "=w"(result) \
8372 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8373 : /* No clobbers */); \
8374 result; \
8377 #define vmlaq_lane_u32(a, b, c, d) \
8378 __extension__ \
8379 ({ \
8380 uint32x4_t c_ = (c); \
8381 uint32x4_t b_ = (b); \
8382 uint32x4_t a_ = (a); \
8383 uint32x4_t result; \
8384 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8385 : "=w"(result) \
8386 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8387 : /* No clobbers */); \
8388 result; \
8391 #define vmlaq_laneq_s16(a, b, c, d) \
8392 __extension__ \
8393 ({ \
8394 int16x8_t c_ = (c); \
8395 int16x8_t b_ = (b); \
8396 int16x8_t a_ = (a); \
8397 int16x8_t result; \
8398 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8399 : "=w"(result) \
8400 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8401 : /* No clobbers */); \
8402 result; \
8405 #define vmlaq_laneq_s32(a, b, c, d) \
8406 __extension__ \
8407 ({ \
8408 int32x4_t c_ = (c); \
8409 int32x4_t b_ = (b); \
8410 int32x4_t a_ = (a); \
8411 int32x4_t result; \
8412 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8413 : "=w"(result) \
8414 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8415 : /* No clobbers */); \
8416 result; \
8419 #define vmlaq_laneq_u16(a, b, c, d) \
8420 __extension__ \
8421 ({ \
8422 uint16x8_t c_ = (c); \
8423 uint16x8_t b_ = (b); \
8424 uint16x8_t a_ = (a); \
8425 uint16x8_t result; \
8426 __asm__ ("mla %0.8h, %2.8h, %3.h[%4]" \
8427 : "=w"(result) \
8428 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8429 : /* No clobbers */); \
8430 result; \
8433 #define vmlaq_laneq_u32(a, b, c, d) \
8434 __extension__ \
8435 ({ \
8436 uint32x4_t c_ = (c); \
8437 uint32x4_t b_ = (b); \
8438 uint32x4_t a_ = (a); \
8439 uint32x4_t result; \
8440 __asm__ ("mla %0.4s, %2.4s, %3.s[%4]" \
8441 : "=w"(result) \
8442 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8443 : /* No clobbers */); \
8444 result; \
8447 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8448 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
8450 float32x4_t result;
8451 float32x4_t t1;
8452 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
8453 : "=w"(result), "=w"(t1)
8454 : "0"(a), "w"(b), "w"(c)
8455 : /* No clobbers */);
8456 return result;
8459 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8460 vmlaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
8462 float64x2_t result;
8463 float64x2_t t1;
8464 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fadd %0.2d, %0.2d, %1.2d"
8465 : "=w"(result), "=w"(t1)
8466 : "0"(a), "w"(b), "w"(c)
8467 : /* No clobbers */);
8468 return result;
8471 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8472 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
8474 int16x8_t result;
8475 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
8476 : "=w"(result)
8477 : "0"(a), "w"(b), "w"(c)
8478 : /* No clobbers */);
8479 return result;
8482 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8483 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
8485 int32x4_t result;
8486 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
8487 : "=w"(result)
8488 : "0"(a), "w"(b), "w"(c)
8489 : /* No clobbers */);
8490 return result;
8493 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8494 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
8496 uint16x8_t result;
8497 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
8498 : "=w"(result)
8499 : "0"(a), "w"(b), "w"(c)
8500 : /* No clobbers */);
8501 return result;
8504 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8505 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
8507 uint32x4_t result;
8508 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
8509 : "=w"(result)
8510 : "0"(a), "w"(b), "w"(c)
8511 : /* No clobbers */);
8512 return result;
8515 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8516 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
8518 int8x16_t result;
8519 __asm__ ("mla %0.16b, %2.16b, %3.16b"
8520 : "=w"(result)
8521 : "0"(a), "w"(b), "w"(c)
8522 : /* No clobbers */);
8523 return result;
8526 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8527 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
8529 int16x8_t result;
8530 __asm__ ("mla %0.8h, %2.8h, %3.8h"
8531 : "=w"(result)
8532 : "0"(a), "w"(b), "w"(c)
8533 : /* No clobbers */);
8534 return result;
8537 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8538 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
8540 int32x4_t result;
8541 __asm__ ("mla %0.4s, %2.4s, %3.4s"
8542 : "=w"(result)
8543 : "0"(a), "w"(b), "w"(c)
8544 : /* No clobbers */);
8545 return result;
8548 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8549 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
8551 uint8x16_t result;
8552 __asm__ ("mla %0.16b, %2.16b, %3.16b"
8553 : "=w"(result)
8554 : "0"(a), "w"(b), "w"(c)
8555 : /* No clobbers */);
8556 return result;
8559 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8560 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
8562 uint16x8_t result;
8563 __asm__ ("mla %0.8h, %2.8h, %3.8h"
8564 : "=w"(result)
8565 : "0"(a), "w"(b), "w"(c)
8566 : /* No clobbers */);
8567 return result;
8570 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8571 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
8573 uint32x4_t result;
8574 __asm__ ("mla %0.4s, %2.4s, %3.4s"
8575 : "=w"(result)
8576 : "0"(a), "w"(b), "w"(c)
8577 : /* No clobbers */);
8578 return result;
8581 #define vmls_lane_f32(a, b, c, d) \
8582 __extension__ \
8583 ({ \
8584 float32x2_t c_ = (c); \
8585 float32x2_t b_ = (b); \
8586 float32x2_t a_ = (a); \
8587 float32x2_t result; \
8588 float32x2_t t1; \
8589 __asm__ ("fmul %1.2s, %3.2s, %4.s[%5]; fsub %0.2s, %0.2s, %1.2s" \
8590 : "=w"(result), "=w"(t1) \
8591 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8592 : /* No clobbers */); \
8593 result; \
8596 #define vmls_lane_s16(a, b, c, d) \
8597 __extension__ \
8598 ({ \
8599 int16x4_t c_ = (c); \
8600 int16x4_t b_ = (b); \
8601 int16x4_t a_ = (a); \
8602 int16x4_t result; \
8603 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
8604 : "=w"(result) \
8605 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8606 : /* No clobbers */); \
8607 result; \
8610 #define vmls_lane_s32(a, b, c, d) \
8611 __extension__ \
8612 ({ \
8613 int32x2_t c_ = (c); \
8614 int32x2_t b_ = (b); \
8615 int32x2_t a_ = (a); \
8616 int32x2_t result; \
8617 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
8618 : "=w"(result) \
8619 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8620 : /* No clobbers */); \
8621 result; \
8624 #define vmls_lane_u16(a, b, c, d) \
8625 __extension__ \
8626 ({ \
8627 uint16x4_t c_ = (c); \
8628 uint16x4_t b_ = (b); \
8629 uint16x4_t a_ = (a); \
8630 uint16x4_t result; \
8631 __asm__ ("mls %0.4h,%2.4h,%3.h[%4]" \
8632 : "=w"(result) \
8633 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8634 : /* No clobbers */); \
8635 result; \
8638 #define vmls_lane_u32(a, b, c, d) \
8639 __extension__ \
8640 ({ \
8641 uint32x2_t c_ = (c); \
8642 uint32x2_t b_ = (b); \
8643 uint32x2_t a_ = (a); \
8644 uint32x2_t result; \
8645 __asm__ ("mls %0.2s,%2.2s,%3.s[%4]" \
8646 : "=w"(result) \
8647 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8648 : /* No clobbers */); \
8649 result; \
8652 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8653 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
8655 float32x2_t result;
8656 float32x2_t t1;
8657 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
8658 : "=w"(result), "=w"(t1)
8659 : "0"(a), "w"(b), "w"(c)
8660 : /* No clobbers */);
8661 return result;
8664 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8665 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
8667 int16x4_t result;
8668 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
8669 : "=w"(result)
8670 : "0"(a), "w"(b), "w"(c)
8671 : /* No clobbers */);
8672 return result;
8675 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8676 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
8678 int32x2_t result;
8679 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
8680 : "=w"(result)
8681 : "0"(a), "w"(b), "w"(c)
8682 : /* No clobbers */);
8683 return result;
8686 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8687 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
8689 uint16x4_t result;
8690 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
8691 : "=w"(result)
8692 : "0"(a), "w"(b), "w"(c)
8693 : /* No clobbers */);
8694 return result;
8697 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8698 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
8700 uint32x2_t result;
8701 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
8702 : "=w"(result)
8703 : "0"(a), "w"(b), "w"(c)
8704 : /* No clobbers */);
8705 return result;
8708 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8709 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
8711 int8x8_t result;
8712 __asm__ ("mls %0.8b,%2.8b,%3.8b"
8713 : "=w"(result)
8714 : "0"(a), "w"(b), "w"(c)
8715 : /* No clobbers */);
8716 return result;
8719 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8720 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
8722 int16x4_t result;
8723 __asm__ ("mls %0.4h,%2.4h,%3.4h"
8724 : "=w"(result)
8725 : "0"(a), "w"(b), "w"(c)
8726 : /* No clobbers */);
8727 return result;
8730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8731 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
8733 int32x2_t result;
8734 __asm__ ("mls %0.2s,%2.2s,%3.2s"
8735 : "=w"(result)
8736 : "0"(a), "w"(b), "w"(c)
8737 : /* No clobbers */);
8738 return result;
8741 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8742 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
8744 uint8x8_t result;
8745 __asm__ ("mls %0.8b,%2.8b,%3.8b"
8746 : "=w"(result)
8747 : "0"(a), "w"(b), "w"(c)
8748 : /* No clobbers */);
8749 return result;
8752 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8753 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
8755 uint16x4_t result;
8756 __asm__ ("mls %0.4h,%2.4h,%3.4h"
8757 : "=w"(result)
8758 : "0"(a), "w"(b), "w"(c)
8759 : /* No clobbers */);
8760 return result;
8763 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8764 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
8766 uint32x2_t result;
8767 __asm__ ("mls %0.2s,%2.2s,%3.2s"
8768 : "=w"(result)
8769 : "0"(a), "w"(b), "w"(c)
8770 : /* No clobbers */);
8771 return result;
8774 #define vmlsl_high_lane_s16(a, b, c, d) \
8775 __extension__ \
8776 ({ \
8777 int16x8_t c_ = (c); \
8778 int16x8_t b_ = (b); \
8779 int32x4_t a_ = (a); \
8780 int32x4_t result; \
8781 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
8782 : "=w"(result) \
8783 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8784 : /* No clobbers */); \
8785 result; \
8788 #define vmlsl_high_lane_s32(a, b, c, d) \
8789 __extension__ \
8790 ({ \
8791 int32x4_t c_ = (c); \
8792 int32x4_t b_ = (b); \
8793 int64x2_t a_ = (a); \
8794 int64x2_t result; \
8795 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
8796 : "=w"(result) \
8797 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8798 : /* No clobbers */); \
8799 result; \
8802 #define vmlsl_high_lane_u16(a, b, c, d) \
8803 __extension__ \
8804 ({ \
8805 uint16x8_t c_ = (c); \
8806 uint16x8_t b_ = (b); \
8807 uint32x4_t a_ = (a); \
8808 uint32x4_t result; \
8809 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
8810 : "=w"(result) \
8811 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8812 : /* No clobbers */); \
8813 result; \
8816 #define vmlsl_high_lane_u32(a, b, c, d) \
8817 __extension__ \
8818 ({ \
8819 uint32x4_t c_ = (c); \
8820 uint32x4_t b_ = (b); \
8821 uint64x2_t a_ = (a); \
8822 uint64x2_t result; \
8823 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
8824 : "=w"(result) \
8825 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8826 : /* No clobbers */); \
8827 result; \
8830 #define vmlsl_high_laneq_s16(a, b, c, d) \
8831 __extension__ \
8832 ({ \
8833 int16x8_t c_ = (c); \
8834 int16x8_t b_ = (b); \
8835 int32x4_t a_ = (a); \
8836 int32x4_t result; \
8837 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
8838 : "=w"(result) \
8839 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8840 : /* No clobbers */); \
8841 result; \
8844 #define vmlsl_high_laneq_s32(a, b, c, d) \
8845 __extension__ \
8846 ({ \
8847 int32x4_t c_ = (c); \
8848 int32x4_t b_ = (b); \
8849 int64x2_t a_ = (a); \
8850 int64x2_t result; \
8851 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
8852 : "=w"(result) \
8853 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8854 : /* No clobbers */); \
8855 result; \
8858 #define vmlsl_high_laneq_u16(a, b, c, d) \
8859 __extension__ \
8860 ({ \
8861 uint16x8_t c_ = (c); \
8862 uint16x8_t b_ = (b); \
8863 uint32x4_t a_ = (a); \
8864 uint32x4_t result; \
8865 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
8866 : "=w"(result) \
8867 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8868 : /* No clobbers */); \
8869 result; \
8872 #define vmlsl_high_laneq_u32(a, b, c, d) \
8873 __extension__ \
8874 ({ \
8875 uint32x4_t c_ = (c); \
8876 uint32x4_t b_ = (b); \
8877 uint64x2_t a_ = (a); \
8878 uint64x2_t result; \
8879 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
8880 : "=w"(result) \
8881 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
8882 : /* No clobbers */); \
8883 result; \
8886 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8887 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
8889 int32x4_t result;
8890 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
8891 : "=w"(result)
8892 : "0"(a), "w"(b), "w"(c)
8893 : /* No clobbers */);
8894 return result;
8897 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8898 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
8900 int64x2_t result;
8901 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
8902 : "=w"(result)
8903 : "0"(a), "w"(b), "w"(c)
8904 : /* No clobbers */);
8905 return result;
8908 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8909 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
8911 uint32x4_t result;
8912 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
8913 : "=w"(result)
8914 : "0"(a), "w"(b), "w"(c)
8915 : /* No clobbers */);
8916 return result;
8919 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8920 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
8922 uint64x2_t result;
8923 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
8924 : "=w"(result)
8925 : "0"(a), "w"(b), "w"(c)
8926 : /* No clobbers */);
8927 return result;
8930 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8931 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
8933 int16x8_t result;
8934 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
8935 : "=w"(result)
8936 : "0"(a), "w"(b), "w"(c)
8937 : /* No clobbers */);
8938 return result;
8941 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8942 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
8944 int32x4_t result;
8945 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
8946 : "=w"(result)
8947 : "0"(a), "w"(b), "w"(c)
8948 : /* No clobbers */);
8949 return result;
8952 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8953 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
8955 int64x2_t result;
8956 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
8957 : "=w"(result)
8958 : "0"(a), "w"(b), "w"(c)
8959 : /* No clobbers */);
8960 return result;
8963 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8964 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
8966 uint16x8_t result;
8967 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
8968 : "=w"(result)
8969 : "0"(a), "w"(b), "w"(c)
8970 : /* No clobbers */);
8971 return result;
8974 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8975 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
8977 uint32x4_t result;
8978 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
8979 : "=w"(result)
8980 : "0"(a), "w"(b), "w"(c)
8981 : /* No clobbers */);
8982 return result;
8985 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8986 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
8988 uint64x2_t result;
8989 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
8990 : "=w"(result)
8991 : "0"(a), "w"(b), "w"(c)
8992 : /* No clobbers */);
8993 return result;
8996 #define vmlsl_lane_s16(a, b, c, d) \
8997 __extension__ \
8998 ({ \
8999 int16x4_t c_ = (c); \
9000 int16x4_t b_ = (b); \
9001 int32x4_t a_ = (a); \
9002 int32x4_t result; \
9003 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
9004 : "=w"(result) \
9005 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9006 : /* No clobbers */); \
9007 result; \
9010 #define vmlsl_lane_s32(a, b, c, d) \
9011 __extension__ \
9012 ({ \
9013 int32x2_t c_ = (c); \
9014 int32x2_t b_ = (b); \
9015 int64x2_t a_ = (a); \
9016 int64x2_t result; \
9017 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
9018 : "=w"(result) \
9019 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9020 : /* No clobbers */); \
9021 result; \
9024 #define vmlsl_lane_u16(a, b, c, d) \
9025 __extension__ \
9026 ({ \
9027 uint16x4_t c_ = (c); \
9028 uint16x4_t b_ = (b); \
9029 uint32x4_t a_ = (a); \
9030 uint32x4_t result; \
9031 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
9032 : "=w"(result) \
9033 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9034 : /* No clobbers */); \
9035 result; \
9038 #define vmlsl_lane_u32(a, b, c, d) \
9039 __extension__ \
9040 ({ \
9041 uint32x2_t c_ = (c); \
9042 uint32x2_t b_ = (b); \
9043 uint64x2_t a_ = (a); \
9044 uint64x2_t result; \
9045 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
9046 : "=w"(result) \
9047 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9048 : /* No clobbers */); \
9049 result; \
9052 #define vmlsl_laneq_s16(a, b, c, d) \
9053 __extension__ \
9054 ({ \
9055 int16x8_t c_ = (c); \
9056 int16x4_t b_ = (b); \
9057 int32x4_t a_ = (a); \
9058 int32x4_t result; \
9059 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
9060 : "=w"(result) \
9061 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9062 : /* No clobbers */); \
9063 result; \
9066 #define vmlsl_laneq_s32(a, b, c, d) \
9067 __extension__ \
9068 ({ \
9069 int32x4_t c_ = (c); \
9070 int32x2_t b_ = (b); \
9071 int64x2_t a_ = (a); \
9072 int64x2_t result; \
9073 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
9074 : "=w"(result) \
9075 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9076 : /* No clobbers */); \
9077 result; \
9080 #define vmlsl_laneq_u16(a, b, c, d) \
9081 __extension__ \
9082 ({ \
9083 uint16x8_t c_ = (c); \
9084 uint16x4_t b_ = (b); \
9085 uint32x4_t a_ = (a); \
9086 uint32x4_t result; \
9087 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
9088 : "=w"(result) \
9089 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9090 : /* No clobbers */); \
9091 result; \
9094 #define vmlsl_laneq_u32(a, b, c, d) \
9095 __extension__ \
9096 ({ \
9097 uint32x4_t c_ = (c); \
9098 uint32x2_t b_ = (b); \
9099 uint64x2_t a_ = (a); \
9100 uint64x2_t result; \
9101 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
9102 : "=w"(result) \
9103 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9104 : /* No clobbers */); \
9105 result; \
9108 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9109 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
9111 int32x4_t result;
9112 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
9113 : "=w"(result)
9114 : "0"(a), "w"(b), "w"(c)
9115 : /* No clobbers */);
9116 return result;
9119 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9120 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
9122 int64x2_t result;
9123 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
9124 : "=w"(result)
9125 : "0"(a), "w"(b), "w"(c)
9126 : /* No clobbers */);
9127 return result;
9130 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9131 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
9133 uint32x4_t result;
9134 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
9135 : "=w"(result)
9136 : "0"(a), "w"(b), "w"(c)
9137 : /* No clobbers */);
9138 return result;
9141 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9142 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
9144 uint64x2_t result;
9145 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
9146 : "=w"(result)
9147 : "0"(a), "w"(b), "w"(c)
9148 : /* No clobbers */);
9149 return result;
9152 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9153 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
9155 int16x8_t result;
9156 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
9157 : "=w"(result)
9158 : "0"(a), "w"(b), "w"(c)
9159 : /* No clobbers */);
9160 return result;
9163 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9164 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
9166 int32x4_t result;
9167 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
9168 : "=w"(result)
9169 : "0"(a), "w"(b), "w"(c)
9170 : /* No clobbers */);
9171 return result;
9174 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9175 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
9177 int64x2_t result;
9178 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
9179 : "=w"(result)
9180 : "0"(a), "w"(b), "w"(c)
9181 : /* No clobbers */);
9182 return result;
9185 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9186 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
9188 uint16x8_t result;
9189 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
9190 : "=w"(result)
9191 : "0"(a), "w"(b), "w"(c)
9192 : /* No clobbers */);
9193 return result;
9196 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9197 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
9199 uint32x4_t result;
9200 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
9201 : "=w"(result)
9202 : "0"(a), "w"(b), "w"(c)
9203 : /* No clobbers */);
9204 return result;
9207 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9208 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
9210 uint64x2_t result;
9211 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
9212 : "=w"(result)
9213 : "0"(a), "w"(b), "w"(c)
9214 : /* No clobbers */);
9215 return result;
9218 #define vmlsq_lane_f32(a, b, c, d) \
9219 __extension__ \
9220 ({ \
9221 float32x4_t c_ = (c); \
9222 float32x4_t b_ = (b); \
9223 float32x4_t a_ = (a); \
9224 float32x4_t result; \
9225 float32x4_t t1; \
9226 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
9227 : "=w"(result), "=w"(t1) \
9228 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9229 : /* No clobbers */); \
9230 result; \
9233 #define vmlsq_lane_s16(a, b, c, d) \
9234 __extension__ \
9235 ({ \
9236 int16x8_t c_ = (c); \
9237 int16x8_t b_ = (b); \
9238 int16x8_t a_ = (a); \
9239 int16x8_t result; \
9240 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
9241 : "=w"(result) \
9242 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9243 : /* No clobbers */); \
9244 result; \
9247 #define vmlsq_lane_s32(a, b, c, d) \
9248 __extension__ \
9249 ({ \
9250 int32x4_t c_ = (c); \
9251 int32x4_t b_ = (b); \
9252 int32x4_t a_ = (a); \
9253 int32x4_t result; \
9254 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
9255 : "=w"(result) \
9256 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9257 : /* No clobbers */); \
9258 result; \
9261 #define vmlsq_lane_u16(a, b, c, d) \
9262 __extension__ \
9263 ({ \
9264 uint16x8_t c_ = (c); \
9265 uint16x8_t b_ = (b); \
9266 uint16x8_t a_ = (a); \
9267 uint16x8_t result; \
9268 __asm__ ("mls %0.8h,%2.8h,%3.h[%4]" \
9269 : "=w"(result) \
9270 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9271 : /* No clobbers */); \
9272 result; \
9275 #define vmlsq_lane_u32(a, b, c, d) \
9276 __extension__ \
9277 ({ \
9278 uint32x4_t c_ = (c); \
9279 uint32x4_t b_ = (b); \
9280 uint32x4_t a_ = (a); \
9281 uint32x4_t result; \
9282 __asm__ ("mls %0.4s,%2.4s,%3.s[%4]" \
9283 : "=w"(result) \
9284 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
9285 : /* No clobbers */); \
9286 result; \
9289 #define vmlsq_laneq_f32(__a, __b, __c, __d) \
9290 __extension__ \
9291 ({ \
9292 float32x4_t __c_ = (__c); \
9293 float32x4_t __b_ = (__b); \
9294 float32x4_t __a_ = (__a); \
9295 float32x4_t __result; \
9296 float32x4_t __t1; \
9297 __asm__ ("fmul %1.4s, %3.4s, %4.s[%5]; fsub %0.4s, %0.4s, %1.4s" \
9298 : "=w"(__result), "=w"(__t1) \
9299 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9300 : /* No clobbers */); \
9301 __result; \
9304 #define vmlsq_laneq_s16(__a, __b, __c, __d) \
9305 __extension__ \
9306 ({ \
9307 int16x8_t __c_ = (__c); \
9308 int16x8_t __b_ = (__b); \
9309 int16x8_t __a_ = (__a); \
9310 int16x8_t __result; \
9311 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
9312 : "=w"(__result) \
9313 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9314 : /* No clobbers */); \
9315 __result; \
9318 #define vmlsq_laneq_s32(__a, __b, __c, __d) \
9319 __extension__ \
9320 ({ \
9321 int32x4_t __c_ = (__c); \
9322 int32x4_t __b_ = (__b); \
9323 int32x4_t __a_ = (__a); \
9324 int32x4_t __result; \
9325 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
9326 : "=w"(__result) \
9327 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9328 : /* No clobbers */); \
9329 __result; \
9332 #define vmlsq_laneq_u16(__a, __b, __c, __d) \
9333 __extension__ \
9334 ({ \
9335 uint16x8_t __c_ = (__c); \
9336 uint16x8_t __b_ = (__b); \
9337 uint16x8_t __a_ = (__a); \
9338 uint16x8_t __result; \
9339 __asm__ ("mls %0.8h, %2.8h, %3.h[%4]" \
9340 : "=w"(__result) \
9341 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9342 : /* No clobbers */); \
9343 __result; \
9346 #define vmlsq_laneq_u32(__a, __b, __c, __d) \
9347 __extension__ \
9348 ({ \
9349 uint32x4_t __c_ = (__c); \
9350 uint32x4_t __b_ = (__b); \
9351 uint32x4_t __a_ = (__a); \
9352 uint32x4_t __result; \
9353 __asm__ ("mls %0.4s, %2.4s, %3.s[%4]" \
9354 : "=w"(__result) \
9355 : "0"(__a_), "w"(__b_), "w"(__c_), "i"(__d) \
9356 : /* No clobbers */); \
9357 __result; \
9360 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9361 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
9363 float32x4_t result;
9364 float32x4_t t1;
9365 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
9366 : "=w"(result), "=w"(t1)
9367 : "0"(a), "w"(b), "w"(c)
9368 : /* No clobbers */);
9369 return result;
9372 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9373 vmlsq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
9375 float64x2_t result;
9376 float64x2_t t1;
9377 __asm__ ("fmul %1.2d, %3.2d, %4.d[0]; fsub %0.2d, %0.2d, %1.2d"
9378 : "=w"(result), "=w"(t1)
9379 : "0"(a), "w"(b), "w"(c)
9380 : /* No clobbers */);
9381 return result;
9384 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9385 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
9387 int16x8_t result;
9388 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
9389 : "=w"(result)
9390 : "0"(a), "w"(b), "w"(c)
9391 : /* No clobbers */);
9392 return result;
9395 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9396 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
9398 int32x4_t result;
9399 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
9400 : "=w"(result)
9401 : "0"(a), "w"(b), "w"(c)
9402 : /* No clobbers */);
9403 return result;
9406 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9407 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
9409 uint16x8_t result;
9410 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
9411 : "=w"(result)
9412 : "0"(a), "w"(b), "w"(c)
9413 : /* No clobbers */);
9414 return result;
9417 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9418 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
9420 uint32x4_t result;
9421 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
9422 : "=w"(result)
9423 : "0"(a), "w"(b), "w"(c)
9424 : /* No clobbers */);
9425 return result;
9428 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9429 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
9431 int8x16_t result;
9432 __asm__ ("mls %0.16b,%2.16b,%3.16b"
9433 : "=w"(result)
9434 : "0"(a), "w"(b), "w"(c)
9435 : /* No clobbers */);
9436 return result;
9439 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9440 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
9442 int16x8_t result;
9443 __asm__ ("mls %0.8h,%2.8h,%3.8h"
9444 : "=w"(result)
9445 : "0"(a), "w"(b), "w"(c)
9446 : /* No clobbers */);
9447 return result;
9450 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9451 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
9453 int32x4_t result;
9454 __asm__ ("mls %0.4s,%2.4s,%3.4s"
9455 : "=w"(result)
9456 : "0"(a), "w"(b), "w"(c)
9457 : /* No clobbers */);
9458 return result;
9461 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9462 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
9464 uint8x16_t result;
9465 __asm__ ("mls %0.16b,%2.16b,%3.16b"
9466 : "=w"(result)
9467 : "0"(a), "w"(b), "w"(c)
9468 : /* No clobbers */);
9469 return result;
9472 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9473 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
9475 uint16x8_t result;
9476 __asm__ ("mls %0.8h,%2.8h,%3.8h"
9477 : "=w"(result)
9478 : "0"(a), "w"(b), "w"(c)
9479 : /* No clobbers */);
9480 return result;
9483 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9484 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
9486 uint32x4_t result;
9487 __asm__ ("mls %0.4s,%2.4s,%3.4s"
9488 : "=w"(result)
9489 : "0"(a), "w"(b), "w"(c)
9490 : /* No clobbers */);
9491 return result;
9494 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9495 vmov_n_f32 (float32_t a)
9497 float32x2_t result;
9498 __asm__ ("dup %0.2s, %w1"
9499 : "=w"(result)
9500 : "r"(a)
9501 : /* No clobbers */);
9502 return result;
9505 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
9506 vmov_n_p8 (uint32_t a)
9508 poly8x8_t result;
9509 __asm__ ("dup %0.8b,%w1"
9510 : "=w"(result)
9511 : "r"(a)
9512 : /* No clobbers */);
9513 return result;
9516 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
9517 vmov_n_p16 (uint32_t a)
9519 poly16x4_t result;
9520 __asm__ ("dup %0.4h,%w1"
9521 : "=w"(result)
9522 : "r"(a)
9523 : /* No clobbers */);
9524 return result;
9527 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9528 vmov_n_s8 (int32_t a)
9530 int8x8_t result;
9531 __asm__ ("dup %0.8b,%w1"
9532 : "=w"(result)
9533 : "r"(a)
9534 : /* No clobbers */);
9535 return result;
9538 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9539 vmov_n_s16 (int32_t a)
9541 int16x4_t result;
9542 __asm__ ("dup %0.4h,%w1"
9543 : "=w"(result)
9544 : "r"(a)
9545 : /* No clobbers */);
9546 return result;
9549 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9550 vmov_n_s32 (int32_t a)
9552 int32x2_t result;
9553 __asm__ ("dup %0.2s,%w1"
9554 : "=w"(result)
9555 : "r"(a)
9556 : /* No clobbers */);
9557 return result;
9560 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9561 vmov_n_s64 (int64_t a)
9563 int64x1_t result;
9564 __asm__ ("ins %0.d[0],%x1"
9565 : "=w"(result)
9566 : "r"(a)
9567 : /* No clobbers */);
9568 return result;
9571 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9572 vmov_n_u8 (uint32_t a)
9574 uint8x8_t result;
9575 __asm__ ("dup %0.8b,%w1"
9576 : "=w"(result)
9577 : "r"(a)
9578 : /* No clobbers */);
9579 return result;
9582 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9583 vmov_n_u16 (uint32_t a)
9585 uint16x4_t result;
9586 __asm__ ("dup %0.4h,%w1"
9587 : "=w"(result)
9588 : "r"(a)
9589 : /* No clobbers */);
9590 return result;
9593 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9594 vmov_n_u32 (uint32_t a)
9596 uint32x2_t result;
9597 __asm__ ("dup %0.2s,%w1"
9598 : "=w"(result)
9599 : "r"(a)
9600 : /* No clobbers */);
9601 return result;
9604 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9605 vmov_n_u64 (uint64_t a)
9607 uint64x1_t result;
9608 __asm__ ("ins %0.d[0],%x1"
9609 : "=w"(result)
9610 : "r"(a)
9611 : /* No clobbers */);
9612 return result;
9615 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9616 vmovl_high_s8 (int8x16_t a)
9618 int16x8_t result;
9619 __asm__ ("sshll2 %0.8h,%1.16b,#0"
9620 : "=w"(result)
9621 : "w"(a)
9622 : /* No clobbers */);
9623 return result;
9626 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9627 vmovl_high_s16 (int16x8_t a)
9629 int32x4_t result;
9630 __asm__ ("sshll2 %0.4s,%1.8h,#0"
9631 : "=w"(result)
9632 : "w"(a)
9633 : /* No clobbers */);
9634 return result;
9637 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9638 vmovl_high_s32 (int32x4_t a)
9640 int64x2_t result;
9641 __asm__ ("sshll2 %0.2d,%1.4s,#0"
9642 : "=w"(result)
9643 : "w"(a)
9644 : /* No clobbers */);
9645 return result;
9648 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9649 vmovl_high_u8 (uint8x16_t a)
9651 uint16x8_t result;
9652 __asm__ ("ushll2 %0.8h,%1.16b,#0"
9653 : "=w"(result)
9654 : "w"(a)
9655 : /* No clobbers */);
9656 return result;
9659 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9660 vmovl_high_u16 (uint16x8_t a)
9662 uint32x4_t result;
9663 __asm__ ("ushll2 %0.4s,%1.8h,#0"
9664 : "=w"(result)
9665 : "w"(a)
9666 : /* No clobbers */);
9667 return result;
9670 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9671 vmovl_high_u32 (uint32x4_t a)
9673 uint64x2_t result;
9674 __asm__ ("ushll2 %0.2d,%1.4s,#0"
9675 : "=w"(result)
9676 : "w"(a)
9677 : /* No clobbers */);
9678 return result;
9681 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9682 vmovl_s8 (int8x8_t a)
9684 int16x8_t result;
9685 __asm__ ("sshll %0.8h,%1.8b,#0"
9686 : "=w"(result)
9687 : "w"(a)
9688 : /* No clobbers */);
9689 return result;
9692 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9693 vmovl_s16 (int16x4_t a)
9695 int32x4_t result;
9696 __asm__ ("sshll %0.4s,%1.4h,#0"
9697 : "=w"(result)
9698 : "w"(a)
9699 : /* No clobbers */);
9700 return result;
9703 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9704 vmovl_s32 (int32x2_t a)
9706 int64x2_t result;
9707 __asm__ ("sshll %0.2d,%1.2s,#0"
9708 : "=w"(result)
9709 : "w"(a)
9710 : /* No clobbers */);
9711 return result;
9714 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9715 vmovl_u8 (uint8x8_t a)
9717 uint16x8_t result;
9718 __asm__ ("ushll %0.8h,%1.8b,#0"
9719 : "=w"(result)
9720 : "w"(a)
9721 : /* No clobbers */);
9722 return result;
9725 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9726 vmovl_u16 (uint16x4_t a)
9728 uint32x4_t result;
9729 __asm__ ("ushll %0.4s,%1.4h,#0"
9730 : "=w"(result)
9731 : "w"(a)
9732 : /* No clobbers */);
9733 return result;
9736 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9737 vmovl_u32 (uint32x2_t a)
9739 uint64x2_t result;
9740 __asm__ ("ushll %0.2d,%1.2s,#0"
9741 : "=w"(result)
9742 : "w"(a)
9743 : /* No clobbers */);
9744 return result;
9747 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9748 vmovn_high_s16 (int8x8_t a, int16x8_t b)
9750 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
9751 __asm__ ("xtn2 %0.16b,%1.8h"
9752 : "+w"(result)
9753 : "w"(b)
9754 : /* No clobbers */);
9755 return result;
9758 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9759 vmovn_high_s32 (int16x4_t a, int32x4_t b)
9761 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
9762 __asm__ ("xtn2 %0.8h,%1.4s"
9763 : "+w"(result)
9764 : "w"(b)
9765 : /* No clobbers */);
9766 return result;
9769 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9770 vmovn_high_s64 (int32x2_t a, int64x2_t b)
9772 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
9773 __asm__ ("xtn2 %0.4s,%1.2d"
9774 : "+w"(result)
9775 : "w"(b)
9776 : /* No clobbers */);
9777 return result;
9780 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9781 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
9783 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
9784 __asm__ ("xtn2 %0.16b,%1.8h"
9785 : "+w"(result)
9786 : "w"(b)
9787 : /* No clobbers */);
9788 return result;
9791 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9792 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
9794 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
9795 __asm__ ("xtn2 %0.8h,%1.4s"
9796 : "+w"(result)
9797 : "w"(b)
9798 : /* No clobbers */);
9799 return result;
9802 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9803 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
9805 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
9806 __asm__ ("xtn2 %0.4s,%1.2d"
9807 : "+w"(result)
9808 : "w"(b)
9809 : /* No clobbers */);
9810 return result;
9813 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9814 vmovn_s16 (int16x8_t a)
9816 int8x8_t result;
9817 __asm__ ("xtn %0.8b,%1.8h"
9818 : "=w"(result)
9819 : "w"(a)
9820 : /* No clobbers */);
9821 return result;
9824 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9825 vmovn_s32 (int32x4_t a)
9827 int16x4_t result;
9828 __asm__ ("xtn %0.4h,%1.4s"
9829 : "=w"(result)
9830 : "w"(a)
9831 : /* No clobbers */);
9832 return result;
9835 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9836 vmovn_s64 (int64x2_t a)
9838 int32x2_t result;
9839 __asm__ ("xtn %0.2s,%1.2d"
9840 : "=w"(result)
9841 : "w"(a)
9842 : /* No clobbers */);
9843 return result;
9846 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9847 vmovn_u16 (uint16x8_t a)
9849 uint8x8_t result;
9850 __asm__ ("xtn %0.8b,%1.8h"
9851 : "=w"(result)
9852 : "w"(a)
9853 : /* No clobbers */);
9854 return result;
9857 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9858 vmovn_u32 (uint32x4_t a)
9860 uint16x4_t result;
9861 __asm__ ("xtn %0.4h,%1.4s"
9862 : "=w"(result)
9863 : "w"(a)
9864 : /* No clobbers */);
9865 return result;
9868 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9869 vmovn_u64 (uint64x2_t a)
9871 uint32x2_t result;
9872 __asm__ ("xtn %0.2s,%1.2d"
9873 : "=w"(result)
9874 : "w"(a)
9875 : /* No clobbers */);
9876 return result;
9879 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9880 vmovq_n_f32 (float32_t a)
9882 float32x4_t result;
9883 __asm__ ("dup %0.4s, %w1"
9884 : "=w"(result)
9885 : "r"(a)
9886 : /* No clobbers */);
9887 return result;
9890 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9891 vmovq_n_f64 (float64_t a)
9893 return (float64x2_t) {a, a};
9896 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
9897 vmovq_n_p8 (uint32_t a)
9899 poly8x16_t result;
9900 __asm__ ("dup %0.16b,%w1"
9901 : "=w"(result)
9902 : "r"(a)
9903 : /* No clobbers */);
9904 return result;
9907 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
9908 vmovq_n_p16 (uint32_t a)
9910 poly16x8_t result;
9911 __asm__ ("dup %0.8h,%w1"
9912 : "=w"(result)
9913 : "r"(a)
9914 : /* No clobbers */);
9915 return result;
9918 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9919 vmovq_n_s8 (int32_t a)
9921 int8x16_t result;
9922 __asm__ ("dup %0.16b,%w1"
9923 : "=w"(result)
9924 : "r"(a)
9925 : /* No clobbers */);
9926 return result;
9929 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9930 vmovq_n_s16 (int32_t a)
9932 int16x8_t result;
9933 __asm__ ("dup %0.8h,%w1"
9934 : "=w"(result)
9935 : "r"(a)
9936 : /* No clobbers */);
9937 return result;
9940 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9941 vmovq_n_s32 (int32_t a)
9943 int32x4_t result;
9944 __asm__ ("dup %0.4s,%w1"
9945 : "=w"(result)
9946 : "r"(a)
9947 : /* No clobbers */);
9948 return result;
9951 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9952 vmovq_n_s64 (int64_t a)
9954 int64x2_t result;
9955 __asm__ ("dup %0.2d,%x1"
9956 : "=w"(result)
9957 : "r"(a)
9958 : /* No clobbers */);
9959 return result;
9962 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9963 vmovq_n_u8 (uint32_t a)
9965 uint8x16_t result;
9966 __asm__ ("dup %0.16b,%w1"
9967 : "=w"(result)
9968 : "r"(a)
9969 : /* No clobbers */);
9970 return result;
9973 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9974 vmovq_n_u16 (uint32_t a)
9976 uint16x8_t result;
9977 __asm__ ("dup %0.8h,%w1"
9978 : "=w"(result)
9979 : "r"(a)
9980 : /* No clobbers */);
9981 return result;
9984 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9985 vmovq_n_u32 (uint32_t a)
9987 uint32x4_t result;
9988 __asm__ ("dup %0.4s,%w1"
9989 : "=w"(result)
9990 : "r"(a)
9991 : /* No clobbers */);
9992 return result;
9995 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9996 vmovq_n_u64 (uint64_t a)
9998 uint64x2_t result;
9999 __asm__ ("dup %0.2d,%x1"
10000 : "=w"(result)
10001 : "r"(a)
10002 : /* No clobbers */);
10003 return result;
10006 #define vmul_lane_f32(a, b, c) \
10007 __extension__ \
10008 ({ \
10009 float32x2_t b_ = (b); \
10010 float32x2_t a_ = (a); \
10011 float32x2_t result; \
10012 __asm__ ("fmul %0.2s,%1.2s,%2.s[%3]" \
10013 : "=w"(result) \
10014 : "w"(a_), "w"(b_), "i"(c) \
10015 : /* No clobbers */); \
10016 result; \
10019 #define vmul_lane_s16(a, b, c) \
10020 __extension__ \
10021 ({ \
10022 int16x4_t b_ = (b); \
10023 int16x4_t a_ = (a); \
10024 int16x4_t result; \
10025 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
10026 : "=w"(result) \
10027 : "w"(a_), "w"(b_), "i"(c) \
10028 : /* No clobbers */); \
10029 result; \
10032 #define vmul_lane_s32(a, b, c) \
10033 __extension__ \
10034 ({ \
10035 int32x2_t b_ = (b); \
10036 int32x2_t a_ = (a); \
10037 int32x2_t result; \
10038 __asm__ ("mul %0.2s,%1.2s,%2.s[%3]" \
10039 : "=w"(result) \
10040 : "w"(a_), "w"(b_), "i"(c) \
10041 : /* No clobbers */); \
10042 result; \
10045 #define vmul_lane_u16(a, b, c) \
10046 __extension__ \
10047 ({ \
10048 uint16x4_t b_ = (b); \
10049 uint16x4_t a_ = (a); \
10050 uint16x4_t result; \
10051 __asm__ ("mul %0.4h,%1.4h,%2.h[%3]" \
10052 : "=w"(result) \
10053 : "w"(a_), "w"(b_), "i"(c) \
10054 : /* No clobbers */); \
10055 result; \
10058 #define vmul_lane_u32(a, b, c) \
10059 __extension__ \
10060 ({ \
10061 uint32x2_t b_ = (b); \
10062 uint32x2_t a_ = (a); \
10063 uint32x2_t result; \
10064 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
10065 : "=w"(result) \
10066 : "w"(a_), "w"(b_), "i"(c) \
10067 : /* No clobbers */); \
10068 result; \
10071 #define vmul_laneq_f32(a, b, c) \
10072 __extension__ \
10073 ({ \
10074 float32x4_t b_ = (b); \
10075 float32x2_t a_ = (a); \
10076 float32x2_t result; \
10077 __asm__ ("fmul %0.2s, %1.2s, %2.s[%3]" \
10078 : "=w"(result) \
10079 : "w"(a_), "w"(b_), "i"(c) \
10080 : /* No clobbers */); \
10081 result; \
10084 #define vmul_laneq_s16(a, b, c) \
10085 __extension__ \
10086 ({ \
10087 int16x8_t b_ = (b); \
10088 int16x4_t a_ = (a); \
10089 int16x4_t result; \
10090 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
10091 : "=w"(result) \
10092 : "w"(a_), "w"(b_), "i"(c) \
10093 : /* No clobbers */); \
10094 result; \
10097 #define vmul_laneq_s32(a, b, c) \
10098 __extension__ \
10099 ({ \
10100 int32x4_t b_ = (b); \
10101 int32x2_t a_ = (a); \
10102 int32x2_t result; \
10103 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
10104 : "=w"(result) \
10105 : "w"(a_), "w"(b_), "i"(c) \
10106 : /* No clobbers */); \
10107 result; \
10110 #define vmul_laneq_u16(a, b, c) \
10111 __extension__ \
10112 ({ \
10113 uint16x8_t b_ = (b); \
10114 uint16x4_t a_ = (a); \
10115 uint16x4_t result; \
10116 __asm__ ("mul %0.4h, %1.4h, %2.h[%3]" \
10117 : "=w"(result) \
10118 : "w"(a_), "w"(b_), "i"(c) \
10119 : /* No clobbers */); \
10120 result; \
10123 #define vmul_laneq_u32(a, b, c) \
10124 __extension__ \
10125 ({ \
10126 uint32x4_t b_ = (b); \
10127 uint32x2_t a_ = (a); \
10128 uint32x2_t result; \
10129 __asm__ ("mul %0.2s, %1.2s, %2.s[%3]" \
10130 : "=w"(result) \
10131 : "w"(a_), "w"(b_), "i"(c) \
10132 : /* No clobbers */); \
10133 result; \
10136 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10137 vmul_n_f32 (float32x2_t a, float32_t b)
10139 float32x2_t result;
10140 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
10141 : "=w"(result)
10142 : "w"(a), "w"(b)
10143 : /* No clobbers */);
10144 return result;
10147 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10148 vmul_n_s16 (int16x4_t a, int16_t b)
10150 int16x4_t result;
10151 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
10152 : "=w"(result)
10153 : "w"(a), "w"(b)
10154 : /* No clobbers */);
10155 return result;
10158 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10159 vmul_n_s32 (int32x2_t a, int32_t b)
10161 int32x2_t result;
10162 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
10163 : "=w"(result)
10164 : "w"(a), "w"(b)
10165 : /* No clobbers */);
10166 return result;
10169 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10170 vmul_n_u16 (uint16x4_t a, uint16_t b)
10172 uint16x4_t result;
10173 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
10174 : "=w"(result)
10175 : "w"(a), "w"(b)
10176 : /* No clobbers */);
10177 return result;
10180 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10181 vmul_n_u32 (uint32x2_t a, uint32_t b)
10183 uint32x2_t result;
10184 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
10185 : "=w"(result)
10186 : "w"(a), "w"(b)
10187 : /* No clobbers */);
10188 return result;
10191 #define vmuld_lane_f64(a, b, c) \
10192 __extension__ \
10193 ({ \
10194 float64x2_t b_ = (b); \
10195 float64_t a_ = (a); \
10196 float64_t result; \
10197 __asm__ ("fmul %d0,%d1,%2.d[%3]" \
10198 : "=w"(result) \
10199 : "w"(a_), "w"(b_), "i"(c) \
10200 : /* No clobbers */); \
10201 result; \
10204 #define vmull_high_lane_s16(a, b, c) \
10205 __extension__ \
10206 ({ \
10207 int16x8_t b_ = (b); \
10208 int16x8_t a_ = (a); \
10209 int32x4_t result; \
10210 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
10211 : "=w"(result) \
10212 : "w"(a_), "w"(b_), "i"(c) \
10213 : /* No clobbers */); \
10214 result; \
10217 #define vmull_high_lane_s32(a, b, c) \
10218 __extension__ \
10219 ({ \
10220 int32x4_t b_ = (b); \
10221 int32x4_t a_ = (a); \
10222 int64x2_t result; \
10223 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
10224 : "=w"(result) \
10225 : "w"(a_), "w"(b_), "i"(c) \
10226 : /* No clobbers */); \
10227 result; \
10230 #define vmull_high_lane_u16(a, b, c) \
10231 __extension__ \
10232 ({ \
10233 uint16x8_t b_ = (b); \
10234 uint16x8_t a_ = (a); \
10235 uint32x4_t result; \
10236 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
10237 : "=w"(result) \
10238 : "w"(a_), "w"(b_), "i"(c) \
10239 : /* No clobbers */); \
10240 result; \
10243 #define vmull_high_lane_u32(a, b, c) \
10244 __extension__ \
10245 ({ \
10246 uint32x4_t b_ = (b); \
10247 uint32x4_t a_ = (a); \
10248 uint64x2_t result; \
10249 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
10250 : "=w"(result) \
10251 : "w"(a_), "w"(b_), "i"(c) \
10252 : /* No clobbers */); \
10253 result; \
10256 #define vmull_high_laneq_s16(a, b, c) \
10257 __extension__ \
10258 ({ \
10259 int16x8_t b_ = (b); \
10260 int16x8_t a_ = (a); \
10261 int32x4_t result; \
10262 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
10263 : "=w"(result) \
10264 : "w"(a_), "w"(b_), "i"(c) \
10265 : /* No clobbers */); \
10266 result; \
10269 #define vmull_high_laneq_s32(a, b, c) \
10270 __extension__ \
10271 ({ \
10272 int32x4_t b_ = (b); \
10273 int32x4_t a_ = (a); \
10274 int64x2_t result; \
10275 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
10276 : "=w"(result) \
10277 : "w"(a_), "w"(b_), "i"(c) \
10278 : /* No clobbers */); \
10279 result; \
10282 #define vmull_high_laneq_u16(a, b, c) \
10283 __extension__ \
10284 ({ \
10285 uint16x8_t b_ = (b); \
10286 uint16x8_t a_ = (a); \
10287 uint32x4_t result; \
10288 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
10289 : "=w"(result) \
10290 : "w"(a_), "w"(b_), "i"(c) \
10291 : /* No clobbers */); \
10292 result; \
10295 #define vmull_high_laneq_u32(a, b, c) \
10296 __extension__ \
10297 ({ \
10298 uint32x4_t b_ = (b); \
10299 uint32x4_t a_ = (a); \
10300 uint64x2_t result; \
10301 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
10302 : "=w"(result) \
10303 : "w"(a_), "w"(b_), "i"(c) \
10304 : /* No clobbers */); \
10305 result; \
10308 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10309 vmull_high_n_s16 (int16x8_t a, int16_t b)
10311 int32x4_t result;
10312 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
10313 : "=w"(result)
10314 : "w"(a), "w"(b)
10315 : /* No clobbers */);
10316 return result;
10319 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10320 vmull_high_n_s32 (int32x4_t a, int32_t b)
10322 int64x2_t result;
10323 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
10324 : "=w"(result)
10325 : "w"(a), "w"(b)
10326 : /* No clobbers */);
10327 return result;
10330 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10331 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
10333 uint32x4_t result;
10334 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
10335 : "=w"(result)
10336 : "w"(a), "w"(b)
10337 : /* No clobbers */);
10338 return result;
10341 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10342 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
10344 uint64x2_t result;
10345 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
10346 : "=w"(result)
10347 : "w"(a), "w"(b)
10348 : /* No clobbers */);
10349 return result;
10352 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10353 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
10355 poly16x8_t result;
10356 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
10357 : "=w"(result)
10358 : "w"(a), "w"(b)
10359 : /* No clobbers */);
10360 return result;
10363 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10364 vmull_high_s8 (int8x16_t a, int8x16_t b)
10366 int16x8_t result;
10367 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
10368 : "=w"(result)
10369 : "w"(a), "w"(b)
10370 : /* No clobbers */);
10371 return result;
10374 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10375 vmull_high_s16 (int16x8_t a, int16x8_t b)
10377 int32x4_t result;
10378 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
10379 : "=w"(result)
10380 : "w"(a), "w"(b)
10381 : /* No clobbers */);
10382 return result;
10385 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10386 vmull_high_s32 (int32x4_t a, int32x4_t b)
10388 int64x2_t result;
10389 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
10390 : "=w"(result)
10391 : "w"(a), "w"(b)
10392 : /* No clobbers */);
10393 return result;
10396 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10397 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
10399 uint16x8_t result;
10400 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
10401 : "=w"(result)
10402 : "w"(a), "w"(b)
10403 : /* No clobbers */);
10404 return result;
10407 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10408 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
10410 uint32x4_t result;
10411 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
10412 : "=w"(result)
10413 : "w"(a), "w"(b)
10414 : /* No clobbers */);
10415 return result;
10418 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10419 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
10421 uint64x2_t result;
10422 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
10423 : "=w"(result)
10424 : "w"(a), "w"(b)
10425 : /* No clobbers */);
10426 return result;
10429 #define vmull_lane_s16(a, b, c) \
10430 __extension__ \
10431 ({ \
10432 int16x4_t b_ = (b); \
10433 int16x4_t a_ = (a); \
10434 int32x4_t result; \
10435 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
10436 : "=w"(result) \
10437 : "w"(a_), "w"(b_), "i"(c) \
10438 : /* No clobbers */); \
10439 result; \
10442 #define vmull_lane_s32(a, b, c) \
10443 __extension__ \
10444 ({ \
10445 int32x2_t b_ = (b); \
10446 int32x2_t a_ = (a); \
10447 int64x2_t result; \
10448 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
10449 : "=w"(result) \
10450 : "w"(a_), "w"(b_), "i"(c) \
10451 : /* No clobbers */); \
10452 result; \
10455 #define vmull_lane_u16(a, b, c) \
10456 __extension__ \
10457 ({ \
10458 uint16x4_t b_ = (b); \
10459 uint16x4_t a_ = (a); \
10460 uint32x4_t result; \
10461 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
10462 : "=w"(result) \
10463 : "w"(a_), "w"(b_), "i"(c) \
10464 : /* No clobbers */); \
10465 result; \
10468 #define vmull_lane_u32(a, b, c) \
10469 __extension__ \
10470 ({ \
10471 uint32x2_t b_ = (b); \
10472 uint32x2_t a_ = (a); \
10473 uint64x2_t result; \
10474 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
10475 : "=w"(result) \
10476 : "w"(a_), "w"(b_), "i"(c) \
10477 : /* No clobbers */); \
10478 result; \
10481 #define vmull_laneq_s16(a, b, c) \
10482 __extension__ \
10483 ({ \
10484 int16x8_t b_ = (b); \
10485 int16x4_t a_ = (a); \
10486 int32x4_t result; \
10487 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
10488 : "=w"(result) \
10489 : "w"(a_), "w"(b_), "i"(c) \
10490 : /* No clobbers */); \
10491 result; \
10494 #define vmull_laneq_s32(a, b, c) \
10495 __extension__ \
10496 ({ \
10497 int32x4_t b_ = (b); \
10498 int32x2_t a_ = (a); \
10499 int64x2_t result; \
10500 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
10501 : "=w"(result) \
10502 : "w"(a_), "w"(b_), "i"(c) \
10503 : /* No clobbers */); \
10504 result; \
10507 #define vmull_laneq_u16(a, b, c) \
10508 __extension__ \
10509 ({ \
10510 uint16x8_t b_ = (b); \
10511 uint16x4_t a_ = (a); \
10512 uint32x4_t result; \
10513 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
10514 : "=w"(result) \
10515 : "w"(a_), "w"(b_), "i"(c) \
10516 : /* No clobbers */); \
10517 result; \
10520 #define vmull_laneq_u32(a, b, c) \
10521 __extension__ \
10522 ({ \
10523 uint32x4_t b_ = (b); \
10524 uint32x2_t a_ = (a); \
10525 uint64x2_t result; \
10526 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
10527 : "=w"(result) \
10528 : "w"(a_), "w"(b_), "i"(c) \
10529 : /* No clobbers */); \
10530 result; \
10533 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10534 vmull_n_s16 (int16x4_t a, int16_t b)
10536 int32x4_t result;
10537 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
10538 : "=w"(result)
10539 : "w"(a), "w"(b)
10540 : /* No clobbers */);
10541 return result;
10544 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10545 vmull_n_s32 (int32x2_t a, int32_t b)
10547 int64x2_t result;
10548 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
10549 : "=w"(result)
10550 : "w"(a), "w"(b)
10551 : /* No clobbers */);
10552 return result;
10555 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10556 vmull_n_u16 (uint16x4_t a, uint16_t b)
10558 uint32x4_t result;
10559 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
10560 : "=w"(result)
10561 : "w"(a), "w"(b)
10562 : /* No clobbers */);
10563 return result;
10566 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10567 vmull_n_u32 (uint32x2_t a, uint32_t b)
10569 uint64x2_t result;
10570 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
10571 : "=w"(result)
10572 : "w"(a), "w"(b)
10573 : /* No clobbers */);
10574 return result;
10577 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
10578 vmull_p8 (poly8x8_t a, poly8x8_t b)
10580 poly16x8_t result;
10581 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
10582 : "=w"(result)
10583 : "w"(a), "w"(b)
10584 : /* No clobbers */);
10585 return result;
10588 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10589 vmull_s8 (int8x8_t a, int8x8_t b)
10591 int16x8_t result;
10592 __asm__ ("smull %0.8h, %1.8b, %2.8b"
10593 : "=w"(result)
10594 : "w"(a), "w"(b)
10595 : /* No clobbers */);
10596 return result;
10599 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10600 vmull_s16 (int16x4_t a, int16x4_t b)
10602 int32x4_t result;
10603 __asm__ ("smull %0.4s, %1.4h, %2.4h"
10604 : "=w"(result)
10605 : "w"(a), "w"(b)
10606 : /* No clobbers */);
10607 return result;
10610 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
10611 vmull_s32 (int32x2_t a, int32x2_t b)
10613 int64x2_t result;
10614 __asm__ ("smull %0.2d, %1.2s, %2.2s"
10615 : "=w"(result)
10616 : "w"(a), "w"(b)
10617 : /* No clobbers */);
10618 return result;
10621 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10622 vmull_u8 (uint8x8_t a, uint8x8_t b)
10624 uint16x8_t result;
10625 __asm__ ("umull %0.8h, %1.8b, %2.8b"
10626 : "=w"(result)
10627 : "w"(a), "w"(b)
10628 : /* No clobbers */);
10629 return result;
10632 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10633 vmull_u16 (uint16x4_t a, uint16x4_t b)
10635 uint32x4_t result;
10636 __asm__ ("umull %0.4s, %1.4h, %2.4h"
10637 : "=w"(result)
10638 : "w"(a), "w"(b)
10639 : /* No clobbers */);
10640 return result;
10643 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
10644 vmull_u32 (uint32x2_t a, uint32x2_t b)
10646 uint64x2_t result;
10647 __asm__ ("umull %0.2d, %1.2s, %2.2s"
10648 : "=w"(result)
10649 : "w"(a), "w"(b)
10650 : /* No clobbers */);
10651 return result;
10654 #define vmulq_lane_f32(a, b, c) \
10655 __extension__ \
10656 ({ \
10657 float32x2_t b_ = (b); \
10658 float32x4_t a_ = (a); \
10659 float32x4_t result; \
10660 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
10661 : "=w"(result) \
10662 : "w"(a_), "w"(b_), "i"(c) \
10663 : /* No clobbers */); \
10664 result; \
10667 #define vmulq_lane_f64(a, b, c) \
10668 __extension__ \
10669 ({ \
10670 float64x1_t b_ = (b); \
10671 float64x2_t a_ = (a); \
10672 float64x2_t result; \
10673 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
10674 : "=w"(result) \
10675 : "w"(a_), "w"(b_), "i"(c) \
10676 : /* No clobbers */); \
10677 result; \
10680 #define vmulq_lane_s16(a, b, c) \
10681 __extension__ \
10682 ({ \
10683 int16x4_t b_ = (b); \
10684 int16x8_t a_ = (a); \
10685 int16x8_t result; \
10686 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
10687 : "=w"(result) \
10688 : "w"(a_), "w"(b_), "i"(c) \
10689 : /* No clobbers */); \
10690 result; \
10693 #define vmulq_lane_s32(a, b, c) \
10694 __extension__ \
10695 ({ \
10696 int32x2_t b_ = (b); \
10697 int32x4_t a_ = (a); \
10698 int32x4_t result; \
10699 __asm__ ("mul %0.4s,%1.4s,%2.s[%3]" \
10700 : "=w"(result) \
10701 : "w"(a_), "w"(b_), "i"(c) \
10702 : /* No clobbers */); \
10703 result; \
10706 #define vmulq_lane_u16(a, b, c) \
10707 __extension__ \
10708 ({ \
10709 uint16x4_t b_ = (b); \
10710 uint16x8_t a_ = (a); \
10711 uint16x8_t result; \
10712 __asm__ ("mul %0.8h,%1.8h,%2.h[%3]" \
10713 : "=w"(result) \
10714 : "w"(a_), "w"(b_), "i"(c) \
10715 : /* No clobbers */); \
10716 result; \
10719 #define vmulq_lane_u32(a, b, c) \
10720 __extension__ \
10721 ({ \
10722 uint32x2_t b_ = (b); \
10723 uint32x4_t a_ = (a); \
10724 uint32x4_t result; \
10725 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
10726 : "=w"(result) \
10727 : "w"(a_), "w"(b_), "i"(c) \
10728 : /* No clobbers */); \
10729 result; \
10732 #define vmulq_laneq_f32(a, b, c) \
10733 __extension__ \
10734 ({ \
10735 float32x4_t b_ = (b); \
10736 float32x4_t a_ = (a); \
10737 float32x4_t result; \
10738 __asm__ ("fmul %0.4s, %1.4s, %2.s[%3]" \
10739 : "=w"(result) \
10740 : "w"(a_), "w"(b_), "i"(c) \
10741 : /* No clobbers */); \
10742 result; \
10745 #define vmulq_laneq_f64(a, b, c) \
10746 __extension__ \
10747 ({ \
10748 float64x2_t b_ = (b); \
10749 float64x2_t a_ = (a); \
10750 float64x2_t result; \
10751 __asm__ ("fmul %0.2d,%1.2d,%2.d[%3]" \
10752 : "=w"(result) \
10753 : "w"(a_), "w"(b_), "i"(c) \
10754 : /* No clobbers */); \
10755 result; \
10758 #define vmulq_laneq_s16(a, b, c) \
10759 __extension__ \
10760 ({ \
10761 int16x8_t b_ = (b); \
10762 int16x8_t a_ = (a); \
10763 int16x8_t result; \
10764 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
10765 : "=w"(result) \
10766 : "w"(a_), "w"(b_), "i"(c) \
10767 : /* No clobbers */); \
10768 result; \
10771 #define vmulq_laneq_s32(a, b, c) \
10772 __extension__ \
10773 ({ \
10774 int32x4_t b_ = (b); \
10775 int32x4_t a_ = (a); \
10776 int32x4_t result; \
10777 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
10778 : "=w"(result) \
10779 : "w"(a_), "w"(b_), "i"(c) \
10780 : /* No clobbers */); \
10781 result; \
10784 #define vmulq_laneq_u16(a, b, c) \
10785 __extension__ \
10786 ({ \
10787 uint16x8_t b_ = (b); \
10788 uint16x8_t a_ = (a); \
10789 uint16x8_t result; \
10790 __asm__ ("mul %0.8h, %1.8h, %2.h[%3]" \
10791 : "=w"(result) \
10792 : "w"(a_), "w"(b_), "i"(c) \
10793 : /* No clobbers */); \
10794 result; \
10797 #define vmulq_laneq_u32(a, b, c) \
10798 __extension__ \
10799 ({ \
10800 uint32x4_t b_ = (b); \
10801 uint32x4_t a_ = (a); \
10802 uint32x4_t result; \
10803 __asm__ ("mul %0.4s, %1.4s, %2.s[%3]" \
10804 : "=w"(result) \
10805 : "w"(a_), "w"(b_), "i"(c) \
10806 : /* No clobbers */); \
10807 result; \
10810 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10811 vmulq_n_f32 (float32x4_t a, float32_t b)
10813 float32x4_t result;
10814 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
10815 : "=w"(result)
10816 : "w"(a), "w"(b)
10817 : /* No clobbers */);
10818 return result;
10821 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10822 vmulq_n_f64 (float64x2_t a, float64_t b)
10824 float64x2_t result;
10825 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
10826 : "=w"(result)
10827 : "w"(a), "w"(b)
10828 : /* No clobbers */);
10829 return result;
10832 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10833 vmulq_n_s16 (int16x8_t a, int16_t b)
10835 int16x8_t result;
10836 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
10837 : "=w"(result)
10838 : "w"(a), "w"(b)
10839 : /* No clobbers */);
10840 return result;
10843 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10844 vmulq_n_s32 (int32x4_t a, int32_t b)
10846 int32x4_t result;
10847 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
10848 : "=w"(result)
10849 : "w"(a), "w"(b)
10850 : /* No clobbers */);
10851 return result;
10854 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10855 vmulq_n_u16 (uint16x8_t a, uint16_t b)
10857 uint16x8_t result;
10858 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
10859 : "=w"(result)
10860 : "w"(a), "w"(b)
10861 : /* No clobbers */);
10862 return result;
10865 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10866 vmulq_n_u32 (uint32x4_t a, uint32_t b)
10868 uint32x4_t result;
10869 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
10870 : "=w"(result)
10871 : "w"(a), "w"(b)
10872 : /* No clobbers */);
10873 return result;
10876 #define vmuls_lane_f32(a, b, c) \
10877 __extension__ \
10878 ({ \
10879 float32x4_t b_ = (b); \
10880 float32_t a_ = (a); \
10881 float32_t result; \
10882 __asm__ ("fmul %s0,%s1,%2.s[%3]" \
10883 : "=w"(result) \
10884 : "w"(a_), "w"(b_), "i"(c) \
10885 : /* No clobbers */); \
10886 result; \
10889 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10890 vmulx_f32 (float32x2_t a, float32x2_t b)
10892 float32x2_t result;
10893 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
10894 : "=w"(result)
10895 : "w"(a), "w"(b)
10896 : /* No clobbers */);
10897 return result;
10900 #define vmulx_lane_f32(a, b, c) \
10901 __extension__ \
10902 ({ \
10903 float32x4_t b_ = (b); \
10904 float32x2_t a_ = (a); \
10905 float32x2_t result; \
10906 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
10907 : "=w"(result) \
10908 : "w"(a_), "w"(b_), "i"(c) \
10909 : /* No clobbers */); \
10910 result; \
10913 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10914 vmulxd_f64 (float64_t a, float64_t b)
10916 float64_t result;
10917 __asm__ ("fmulx %d0, %d1, %d2"
10918 : "=w"(result)
10919 : "w"(a), "w"(b)
10920 : /* No clobbers */);
10921 return result;
10924 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10925 vmulxq_f32 (float32x4_t a, float32x4_t b)
10927 float32x4_t result;
10928 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
10929 : "=w"(result)
10930 : "w"(a), "w"(b)
10931 : /* No clobbers */);
10932 return result;
10935 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10936 vmulxq_f64 (float64x2_t a, float64x2_t b)
10938 float64x2_t result;
10939 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
10940 : "=w"(result)
10941 : "w"(a), "w"(b)
10942 : /* No clobbers */);
10943 return result;
10946 #define vmulxq_lane_f32(a, b, c) \
10947 __extension__ \
10948 ({ \
10949 float32x4_t b_ = (b); \
10950 float32x4_t a_ = (a); \
10951 float32x4_t result; \
10952 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
10953 : "=w"(result) \
10954 : "w"(a_), "w"(b_), "i"(c) \
10955 : /* No clobbers */); \
10956 result; \
10959 #define vmulxq_lane_f64(a, b, c) \
10960 __extension__ \
10961 ({ \
10962 float64x2_t b_ = (b); \
10963 float64x2_t a_ = (a); \
10964 float64x2_t result; \
10965 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
10966 : "=w"(result) \
10967 : "w"(a_), "w"(b_), "i"(c) \
10968 : /* No clobbers */); \
10969 result; \
10972 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10973 vmulxs_f32 (float32_t a, float32_t b)
10975 float32_t result;
10976 __asm__ ("fmulx %s0, %s1, %s2"
10977 : "=w"(result)
10978 : "w"(a), "w"(b)
10979 : /* No clobbers */);
10980 return result;
10983 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
10984 vmvn_p8 (poly8x8_t a)
10986 poly8x8_t result;
10987 __asm__ ("mvn %0.8b,%1.8b"
10988 : "=w"(result)
10989 : "w"(a)
10990 : /* No clobbers */);
10991 return result;
10994 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10995 vmvn_s8 (int8x8_t a)
10997 int8x8_t result;
10998 __asm__ ("mvn %0.8b,%1.8b"
10999 : "=w"(result)
11000 : "w"(a)
11001 : /* No clobbers */);
11002 return result;
11005 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11006 vmvn_s16 (int16x4_t a)
11008 int16x4_t result;
11009 __asm__ ("mvn %0.8b,%1.8b"
11010 : "=w"(result)
11011 : "w"(a)
11012 : /* No clobbers */);
11013 return result;
11016 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11017 vmvn_s32 (int32x2_t a)
11019 int32x2_t result;
11020 __asm__ ("mvn %0.8b,%1.8b"
11021 : "=w"(result)
11022 : "w"(a)
11023 : /* No clobbers */);
11024 return result;
11027 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11028 vmvn_u8 (uint8x8_t a)
11030 uint8x8_t result;
11031 __asm__ ("mvn %0.8b,%1.8b"
11032 : "=w"(result)
11033 : "w"(a)
11034 : /* No clobbers */);
11035 return result;
11038 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11039 vmvn_u16 (uint16x4_t a)
11041 uint16x4_t result;
11042 __asm__ ("mvn %0.8b,%1.8b"
11043 : "=w"(result)
11044 : "w"(a)
11045 : /* No clobbers */);
11046 return result;
11049 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11050 vmvn_u32 (uint32x2_t a)
11052 uint32x2_t result;
11053 __asm__ ("mvn %0.8b,%1.8b"
11054 : "=w"(result)
11055 : "w"(a)
11056 : /* No clobbers */);
11057 return result;
11060 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
11061 vmvnq_p8 (poly8x16_t a)
11063 poly8x16_t result;
11064 __asm__ ("mvn %0.16b,%1.16b"
11065 : "=w"(result)
11066 : "w"(a)
11067 : /* No clobbers */);
11068 return result;
11071 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11072 vmvnq_s8 (int8x16_t a)
11074 int8x16_t result;
11075 __asm__ ("mvn %0.16b,%1.16b"
11076 : "=w"(result)
11077 : "w"(a)
11078 : /* No clobbers */);
11079 return result;
11082 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11083 vmvnq_s16 (int16x8_t a)
11085 int16x8_t result;
11086 __asm__ ("mvn %0.16b,%1.16b"
11087 : "=w"(result)
11088 : "w"(a)
11089 : /* No clobbers */);
11090 return result;
11093 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11094 vmvnq_s32 (int32x4_t a)
11096 int32x4_t result;
11097 __asm__ ("mvn %0.16b,%1.16b"
11098 : "=w"(result)
11099 : "w"(a)
11100 : /* No clobbers */);
11101 return result;
11104 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11105 vmvnq_u8 (uint8x16_t a)
11107 uint8x16_t result;
11108 __asm__ ("mvn %0.16b,%1.16b"
11109 : "=w"(result)
11110 : "w"(a)
11111 : /* No clobbers */);
11112 return result;
11115 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11116 vmvnq_u16 (uint16x8_t a)
11118 uint16x8_t result;
11119 __asm__ ("mvn %0.16b,%1.16b"
11120 : "=w"(result)
11121 : "w"(a)
11122 : /* No clobbers */);
11123 return result;
11126 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11127 vmvnq_u32 (uint32x4_t a)
11129 uint32x4_t result;
11130 __asm__ ("mvn %0.16b,%1.16b"
11131 : "=w"(result)
11132 : "w"(a)
11133 : /* No clobbers */);
11134 return result;
11137 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11138 vneg_f32 (float32x2_t a)
11140 float32x2_t result;
11141 __asm__ ("fneg %0.2s,%1.2s"
11142 : "=w"(result)
11143 : "w"(a)
11144 : /* No clobbers */);
11145 return result;
11148 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11149 vneg_s8 (int8x8_t a)
11151 int8x8_t result;
11152 __asm__ ("neg %0.8b,%1.8b"
11153 : "=w"(result)
11154 : "w"(a)
11155 : /* No clobbers */);
11156 return result;
11159 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11160 vneg_s16 (int16x4_t a)
11162 int16x4_t result;
11163 __asm__ ("neg %0.4h,%1.4h"
11164 : "=w"(result)
11165 : "w"(a)
11166 : /* No clobbers */);
11167 return result;
11170 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11171 vneg_s32 (int32x2_t a)
11173 int32x2_t result;
11174 __asm__ ("neg %0.2s,%1.2s"
11175 : "=w"(result)
11176 : "w"(a)
11177 : /* No clobbers */);
11178 return result;
11181 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11182 vnegq_f32 (float32x4_t a)
11184 float32x4_t result;
11185 __asm__ ("fneg %0.4s,%1.4s"
11186 : "=w"(result)
11187 : "w"(a)
11188 : /* No clobbers */);
11189 return result;
11192 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11193 vnegq_f64 (float64x2_t a)
11195 float64x2_t result;
11196 __asm__ ("fneg %0.2d,%1.2d"
11197 : "=w"(result)
11198 : "w"(a)
11199 : /* No clobbers */);
11200 return result;
11203 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11204 vnegq_s8 (int8x16_t a)
11206 int8x16_t result;
11207 __asm__ ("neg %0.16b,%1.16b"
11208 : "=w"(result)
11209 : "w"(a)
11210 : /* No clobbers */);
11211 return result;
11214 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11215 vnegq_s16 (int16x8_t a)
11217 int16x8_t result;
11218 __asm__ ("neg %0.8h,%1.8h"
11219 : "=w"(result)
11220 : "w"(a)
11221 : /* No clobbers */);
11222 return result;
11225 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11226 vnegq_s32 (int32x4_t a)
11228 int32x4_t result;
11229 __asm__ ("neg %0.4s,%1.4s"
11230 : "=w"(result)
11231 : "w"(a)
11232 : /* No clobbers */);
11233 return result;
11236 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11237 vnegq_s64 (int64x2_t a)
11239 int64x2_t result;
11240 __asm__ ("neg %0.2d,%1.2d"
11241 : "=w"(result)
11242 : "w"(a)
11243 : /* No clobbers */);
11244 return result;
11247 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11248 vpadal_s8 (int16x4_t a, int8x8_t b)
11250 int16x4_t result;
11251 __asm__ ("sadalp %0.4h,%2.8b"
11252 : "=w"(result)
11253 : "0"(a), "w"(b)
11254 : /* No clobbers */);
11255 return result;
11258 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11259 vpadal_s16 (int32x2_t a, int16x4_t b)
11261 int32x2_t result;
11262 __asm__ ("sadalp %0.2s,%2.4h"
11263 : "=w"(result)
11264 : "0"(a), "w"(b)
11265 : /* No clobbers */);
11266 return result;
11269 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
11270 vpadal_s32 (int64x1_t a, int32x2_t b)
11272 int64x1_t result;
11273 __asm__ ("sadalp %0.1d,%2.2s"
11274 : "=w"(result)
11275 : "0"(a), "w"(b)
11276 : /* No clobbers */);
11277 return result;
11280 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11281 vpadal_u8 (uint16x4_t a, uint8x8_t b)
11283 uint16x4_t result;
11284 __asm__ ("uadalp %0.4h,%2.8b"
11285 : "=w"(result)
11286 : "0"(a), "w"(b)
11287 : /* No clobbers */);
11288 return result;
11291 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11292 vpadal_u16 (uint32x2_t a, uint16x4_t b)
11294 uint32x2_t result;
11295 __asm__ ("uadalp %0.2s,%2.4h"
11296 : "=w"(result)
11297 : "0"(a), "w"(b)
11298 : /* No clobbers */);
11299 return result;
11302 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11303 vpadal_u32 (uint64x1_t a, uint32x2_t b)
11305 uint64x1_t result;
11306 __asm__ ("uadalp %0.1d,%2.2s"
11307 : "=w"(result)
11308 : "0"(a), "w"(b)
11309 : /* No clobbers */);
11310 return result;
11313 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11314 vpadalq_s8 (int16x8_t a, int8x16_t b)
11316 int16x8_t result;
11317 __asm__ ("sadalp %0.8h,%2.16b"
11318 : "=w"(result)
11319 : "0"(a), "w"(b)
11320 : /* No clobbers */);
11321 return result;
11324 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11325 vpadalq_s16 (int32x4_t a, int16x8_t b)
11327 int32x4_t result;
11328 __asm__ ("sadalp %0.4s,%2.8h"
11329 : "=w"(result)
11330 : "0"(a), "w"(b)
11331 : /* No clobbers */);
11332 return result;
11335 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11336 vpadalq_s32 (int64x2_t a, int32x4_t b)
11338 int64x2_t result;
11339 __asm__ ("sadalp %0.2d,%2.4s"
11340 : "=w"(result)
11341 : "0"(a), "w"(b)
11342 : /* No clobbers */);
11343 return result;
11346 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11347 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
11349 uint16x8_t result;
11350 __asm__ ("uadalp %0.8h,%2.16b"
11351 : "=w"(result)
11352 : "0"(a), "w"(b)
11353 : /* No clobbers */);
11354 return result;
11357 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11358 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
11360 uint32x4_t result;
11361 __asm__ ("uadalp %0.4s,%2.8h"
11362 : "=w"(result)
11363 : "0"(a), "w"(b)
11364 : /* No clobbers */);
11365 return result;
11368 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11369 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
11371 uint64x2_t result;
11372 __asm__ ("uadalp %0.2d,%2.4s"
11373 : "=w"(result)
11374 : "0"(a), "w"(b)
11375 : /* No clobbers */);
11376 return result;
11379 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11380 vpadd_f32 (float32x2_t a, float32x2_t b)
11382 float32x2_t result;
11383 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
11384 : "=w"(result)
11385 : "w"(a), "w"(b)
11386 : /* No clobbers */);
11387 return result;
11390 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11391 vpadd_s8 (int8x8_t __a, int8x8_t __b)
11393 return __builtin_aarch64_addpv8qi (__a, __b);
11396 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11397 vpadd_s16 (int16x4_t __a, int16x4_t __b)
11399 return __builtin_aarch64_addpv4hi (__a, __b);
11402 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11403 vpadd_s32 (int32x2_t __a, int32x2_t __b)
11405 return __builtin_aarch64_addpv2si (__a, __b);
11408 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11409 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
11411 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
11412 (int8x8_t) __b);
11415 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11416 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
11418 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
11419 (int16x4_t) __b);
11422 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11423 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
11425 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
11426 (int32x2_t) __b);
11429 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11430 vpaddd_f64 (float64x2_t a)
11432 float64_t result;
11433 __asm__ ("faddp %d0,%1.2d"
11434 : "=w"(result)
11435 : "w"(a)
11436 : /* No clobbers */);
11437 return result;
11440 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11441 vpaddl_s8 (int8x8_t a)
11443 int16x4_t result;
11444 __asm__ ("saddlp %0.4h,%1.8b"
11445 : "=w"(result)
11446 : "w"(a)
11447 : /* No clobbers */);
11448 return result;
11451 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11452 vpaddl_s16 (int16x4_t a)
11454 int32x2_t result;
11455 __asm__ ("saddlp %0.2s,%1.4h"
11456 : "=w"(result)
11457 : "w"(a)
11458 : /* No clobbers */);
11459 return result;
11462 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
11463 vpaddl_s32 (int32x2_t a)
11465 int64x1_t result;
11466 __asm__ ("saddlp %0.1d,%1.2s"
11467 : "=w"(result)
11468 : "w"(a)
11469 : /* No clobbers */);
11470 return result;
11473 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11474 vpaddl_u8 (uint8x8_t a)
11476 uint16x4_t result;
11477 __asm__ ("uaddlp %0.4h,%1.8b"
11478 : "=w"(result)
11479 : "w"(a)
11480 : /* No clobbers */);
11481 return result;
11484 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11485 vpaddl_u16 (uint16x4_t a)
11487 uint32x2_t result;
11488 __asm__ ("uaddlp %0.2s,%1.4h"
11489 : "=w"(result)
11490 : "w"(a)
11491 : /* No clobbers */);
11492 return result;
11495 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
11496 vpaddl_u32 (uint32x2_t a)
11498 uint64x1_t result;
11499 __asm__ ("uaddlp %0.1d,%1.2s"
11500 : "=w"(result)
11501 : "w"(a)
11502 : /* No clobbers */);
11503 return result;
11506 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11507 vpaddlq_s8 (int8x16_t a)
11509 int16x8_t result;
11510 __asm__ ("saddlp %0.8h,%1.16b"
11511 : "=w"(result)
11512 : "w"(a)
11513 : /* No clobbers */);
11514 return result;
11517 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11518 vpaddlq_s16 (int16x8_t a)
11520 int32x4_t result;
11521 __asm__ ("saddlp %0.4s,%1.8h"
11522 : "=w"(result)
11523 : "w"(a)
11524 : /* No clobbers */);
11525 return result;
11528 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11529 vpaddlq_s32 (int32x4_t a)
11531 int64x2_t result;
11532 __asm__ ("saddlp %0.2d,%1.4s"
11533 : "=w"(result)
11534 : "w"(a)
11535 : /* No clobbers */);
11536 return result;
11539 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11540 vpaddlq_u8 (uint8x16_t a)
11542 uint16x8_t result;
11543 __asm__ ("uaddlp %0.8h,%1.16b"
11544 : "=w"(result)
11545 : "w"(a)
11546 : /* No clobbers */);
11547 return result;
11550 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11551 vpaddlq_u16 (uint16x8_t a)
11553 uint32x4_t result;
11554 __asm__ ("uaddlp %0.4s,%1.8h"
11555 : "=w"(result)
11556 : "w"(a)
11557 : /* No clobbers */);
11558 return result;
11561 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11562 vpaddlq_u32 (uint32x4_t a)
11564 uint64x2_t result;
11565 __asm__ ("uaddlp %0.2d,%1.4s"
11566 : "=w"(result)
11567 : "w"(a)
11568 : /* No clobbers */);
11569 return result;
11572 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11573 vpaddq_f32 (float32x4_t a, float32x4_t b)
11575 float32x4_t result;
11576 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
11577 : "=w"(result)
11578 : "w"(a), "w"(b)
11579 : /* No clobbers */);
11580 return result;
11583 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11584 vpaddq_f64 (float64x2_t a, float64x2_t b)
11586 float64x2_t result;
11587 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
11588 : "=w"(result)
11589 : "w"(a), "w"(b)
11590 : /* No clobbers */);
11591 return result;
11594 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11595 vpaddq_s8 (int8x16_t a, int8x16_t b)
11597 int8x16_t result;
11598 __asm__ ("addp %0.16b,%1.16b,%2.16b"
11599 : "=w"(result)
11600 : "w"(a), "w"(b)
11601 : /* No clobbers */);
11602 return result;
11605 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11606 vpaddq_s16 (int16x8_t a, int16x8_t b)
11608 int16x8_t result;
11609 __asm__ ("addp %0.8h,%1.8h,%2.8h"
11610 : "=w"(result)
11611 : "w"(a), "w"(b)
11612 : /* No clobbers */);
11613 return result;
11616 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11617 vpaddq_s32 (int32x4_t a, int32x4_t b)
11619 int32x4_t result;
11620 __asm__ ("addp %0.4s,%1.4s,%2.4s"
11621 : "=w"(result)
11622 : "w"(a), "w"(b)
11623 : /* No clobbers */);
11624 return result;
11627 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
11628 vpaddq_s64 (int64x2_t a, int64x2_t b)
11630 int64x2_t result;
11631 __asm__ ("addp %0.2d,%1.2d,%2.2d"
11632 : "=w"(result)
11633 : "w"(a), "w"(b)
11634 : /* No clobbers */);
11635 return result;
11638 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11639 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
11641 uint8x16_t result;
11642 __asm__ ("addp %0.16b,%1.16b,%2.16b"
11643 : "=w"(result)
11644 : "w"(a), "w"(b)
11645 : /* No clobbers */);
11646 return result;
11649 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11650 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
11652 uint16x8_t result;
11653 __asm__ ("addp %0.8h,%1.8h,%2.8h"
11654 : "=w"(result)
11655 : "w"(a), "w"(b)
11656 : /* No clobbers */);
11657 return result;
11660 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11661 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
11663 uint32x4_t result;
11664 __asm__ ("addp %0.4s,%1.4s,%2.4s"
11665 : "=w"(result)
11666 : "w"(a), "w"(b)
11667 : /* No clobbers */);
11668 return result;
11671 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
11672 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
11674 uint64x2_t result;
11675 __asm__ ("addp %0.2d,%1.2d,%2.2d"
11676 : "=w"(result)
11677 : "w"(a), "w"(b)
11678 : /* No clobbers */);
11679 return result;
11682 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11683 vpadds_f32 (float32x2_t a)
11685 float32_t result;
11686 __asm__ ("faddp %s0,%1.2s"
11687 : "=w"(result)
11688 : "w"(a)
11689 : /* No clobbers */);
11690 return result;
11693 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11694 vpmax_f32 (float32x2_t a, float32x2_t b)
11696 float32x2_t result;
11697 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
11698 : "=w"(result)
11699 : "w"(a), "w"(b)
11700 : /* No clobbers */);
11701 return result;
11704 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11705 vpmax_s8 (int8x8_t a, int8x8_t b)
11707 int8x8_t result;
11708 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
11709 : "=w"(result)
11710 : "w"(a), "w"(b)
11711 : /* No clobbers */);
11712 return result;
11715 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11716 vpmax_s16 (int16x4_t a, int16x4_t b)
11718 int16x4_t result;
11719 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
11720 : "=w"(result)
11721 : "w"(a), "w"(b)
11722 : /* No clobbers */);
11723 return result;
11726 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11727 vpmax_s32 (int32x2_t a, int32x2_t b)
11729 int32x2_t result;
11730 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
11731 : "=w"(result)
11732 : "w"(a), "w"(b)
11733 : /* No clobbers */);
11734 return result;
11737 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11738 vpmax_u8 (uint8x8_t a, uint8x8_t b)
11740 uint8x8_t result;
11741 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
11742 : "=w"(result)
11743 : "w"(a), "w"(b)
11744 : /* No clobbers */);
11745 return result;
11748 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11749 vpmax_u16 (uint16x4_t a, uint16x4_t b)
11751 uint16x4_t result;
11752 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
11753 : "=w"(result)
11754 : "w"(a), "w"(b)
11755 : /* No clobbers */);
11756 return result;
11759 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11760 vpmax_u32 (uint32x2_t a, uint32x2_t b)
11762 uint32x2_t result;
11763 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
11764 : "=w"(result)
11765 : "w"(a), "w"(b)
11766 : /* No clobbers */);
11767 return result;
11770 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11771 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
11773 float32x2_t result;
11774 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
11775 : "=w"(result)
11776 : "w"(a), "w"(b)
11777 : /* No clobbers */);
11778 return result;
11781 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11782 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
11784 float32x4_t result;
11785 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
11786 : "=w"(result)
11787 : "w"(a), "w"(b)
11788 : /* No clobbers */);
11789 return result;
11792 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11793 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
11795 float64x2_t result;
11796 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
11797 : "=w"(result)
11798 : "w"(a), "w"(b)
11799 : /* No clobbers */);
11800 return result;
11803 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11804 vpmaxnmqd_f64 (float64x2_t a)
11806 float64_t result;
11807 __asm__ ("fmaxnmp %d0,%1.2d"
11808 : "=w"(result)
11809 : "w"(a)
11810 : /* No clobbers */);
11811 return result;
11814 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11815 vpmaxnms_f32 (float32x2_t a)
11817 float32_t result;
11818 __asm__ ("fmaxnmp %s0,%1.2s"
11819 : "=w"(result)
11820 : "w"(a)
11821 : /* No clobbers */);
11822 return result;
11825 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
11826 vpmaxq_f32 (float32x4_t a, float32x4_t b)
11828 float32x4_t result;
11829 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
11830 : "=w"(result)
11831 : "w"(a), "w"(b)
11832 : /* No clobbers */);
11833 return result;
11836 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
11837 vpmaxq_f64 (float64x2_t a, float64x2_t b)
11839 float64x2_t result;
11840 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
11841 : "=w"(result)
11842 : "w"(a), "w"(b)
11843 : /* No clobbers */);
11844 return result;
11847 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11848 vpmaxq_s8 (int8x16_t a, int8x16_t b)
11850 int8x16_t result;
11851 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
11852 : "=w"(result)
11853 : "w"(a), "w"(b)
11854 : /* No clobbers */);
11855 return result;
11858 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11859 vpmaxq_s16 (int16x8_t a, int16x8_t b)
11861 int16x8_t result;
11862 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
11863 : "=w"(result)
11864 : "w"(a), "w"(b)
11865 : /* No clobbers */);
11866 return result;
11869 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11870 vpmaxq_s32 (int32x4_t a, int32x4_t b)
11872 int32x4_t result;
11873 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
11874 : "=w"(result)
11875 : "w"(a), "w"(b)
11876 : /* No clobbers */);
11877 return result;
11880 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11881 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
11883 uint8x16_t result;
11884 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
11885 : "=w"(result)
11886 : "w"(a), "w"(b)
11887 : /* No clobbers */);
11888 return result;
11891 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11892 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
11894 uint16x8_t result;
11895 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
11896 : "=w"(result)
11897 : "w"(a), "w"(b)
11898 : /* No clobbers */);
11899 return result;
11902 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11903 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
11905 uint32x4_t result;
11906 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
11907 : "=w"(result)
11908 : "w"(a), "w"(b)
11909 : /* No clobbers */);
11910 return result;
11913 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
11914 vpmaxqd_f64 (float64x2_t a)
11916 float64_t result;
11917 __asm__ ("fmaxp %d0,%1.2d"
11918 : "=w"(result)
11919 : "w"(a)
11920 : /* No clobbers */);
11921 return result;
11924 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
11925 vpmaxs_f32 (float32x2_t a)
11927 float32_t result;
11928 __asm__ ("fmaxp %s0,%1.2s"
11929 : "=w"(result)
11930 : "w"(a)
11931 : /* No clobbers */);
11932 return result;
11935 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
11936 vpmin_f32 (float32x2_t a, float32x2_t b)
11938 float32x2_t result;
11939 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
11940 : "=w"(result)
11941 : "w"(a), "w"(b)
11942 : /* No clobbers */);
11943 return result;
11946 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11947 vpmin_s8 (int8x8_t a, int8x8_t b)
11949 int8x8_t result;
11950 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
11951 : "=w"(result)
11952 : "w"(a), "w"(b)
11953 : /* No clobbers */);
11954 return result;
11957 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11958 vpmin_s16 (int16x4_t a, int16x4_t b)
11960 int16x4_t result;
11961 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
11962 : "=w"(result)
11963 : "w"(a), "w"(b)
11964 : /* No clobbers */);
11965 return result;
11968 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11969 vpmin_s32 (int32x2_t a, int32x2_t b)
11971 int32x2_t result;
11972 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
11973 : "=w"(result)
11974 : "w"(a), "w"(b)
11975 : /* No clobbers */);
11976 return result;
11979 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11980 vpmin_u8 (uint8x8_t a, uint8x8_t b)
11982 uint8x8_t result;
11983 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
11984 : "=w"(result)
11985 : "w"(a), "w"(b)
11986 : /* No clobbers */);
11987 return result;
11990 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11991 vpmin_u16 (uint16x4_t a, uint16x4_t b)
11993 uint16x4_t result;
11994 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
11995 : "=w"(result)
11996 : "w"(a), "w"(b)
11997 : /* No clobbers */);
11998 return result;
12001 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12002 vpmin_u32 (uint32x2_t a, uint32x2_t b)
12004 uint32x2_t result;
12005 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
12006 : "=w"(result)
12007 : "w"(a), "w"(b)
12008 : /* No clobbers */);
12009 return result;
12012 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12013 vpminnm_f32 (float32x2_t a, float32x2_t b)
12015 float32x2_t result;
12016 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
12017 : "=w"(result)
12018 : "w"(a), "w"(b)
12019 : /* No clobbers */);
12020 return result;
12023 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12024 vpminnmq_f32 (float32x4_t a, float32x4_t b)
12026 float32x4_t result;
12027 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
12028 : "=w"(result)
12029 : "w"(a), "w"(b)
12030 : /* No clobbers */);
12031 return result;
12034 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12035 vpminnmq_f64 (float64x2_t a, float64x2_t b)
12037 float64x2_t result;
12038 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
12039 : "=w"(result)
12040 : "w"(a), "w"(b)
12041 : /* No clobbers */);
12042 return result;
12045 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12046 vpminnmqd_f64 (float64x2_t a)
12048 float64_t result;
12049 __asm__ ("fminnmp %d0,%1.2d"
12050 : "=w"(result)
12051 : "w"(a)
12052 : /* No clobbers */);
12053 return result;
12056 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12057 vpminnms_f32 (float32x2_t a)
12059 float32_t result;
12060 __asm__ ("fminnmp %s0,%1.2s"
12061 : "=w"(result)
12062 : "w"(a)
12063 : /* No clobbers */);
12064 return result;
12067 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12068 vpminq_f32 (float32x4_t a, float32x4_t b)
12070 float32x4_t result;
12071 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
12072 : "=w"(result)
12073 : "w"(a), "w"(b)
12074 : /* No clobbers */);
12075 return result;
12078 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
12079 vpminq_f64 (float64x2_t a, float64x2_t b)
12081 float64x2_t result;
12082 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
12083 : "=w"(result)
12084 : "w"(a), "w"(b)
12085 : /* No clobbers */);
12086 return result;
12089 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12090 vpminq_s8 (int8x16_t a, int8x16_t b)
12092 int8x16_t result;
12093 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
12094 : "=w"(result)
12095 : "w"(a), "w"(b)
12096 : /* No clobbers */);
12097 return result;
12100 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12101 vpminq_s16 (int16x8_t a, int16x8_t b)
12103 int16x8_t result;
12104 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
12105 : "=w"(result)
12106 : "w"(a), "w"(b)
12107 : /* No clobbers */);
12108 return result;
12111 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12112 vpminq_s32 (int32x4_t a, int32x4_t b)
12114 int32x4_t result;
12115 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
12116 : "=w"(result)
12117 : "w"(a), "w"(b)
12118 : /* No clobbers */);
12119 return result;
12122 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12123 vpminq_u8 (uint8x16_t a, uint8x16_t b)
12125 uint8x16_t result;
12126 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
12127 : "=w"(result)
12128 : "w"(a), "w"(b)
12129 : /* No clobbers */);
12130 return result;
12133 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12134 vpminq_u16 (uint16x8_t a, uint16x8_t b)
12136 uint16x8_t result;
12137 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
12138 : "=w"(result)
12139 : "w"(a), "w"(b)
12140 : /* No clobbers */);
12141 return result;
12144 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12145 vpminq_u32 (uint32x4_t a, uint32x4_t b)
12147 uint32x4_t result;
12148 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
12149 : "=w"(result)
12150 : "w"(a), "w"(b)
12151 : /* No clobbers */);
12152 return result;
12155 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
12156 vpminqd_f64 (float64x2_t a)
12158 float64_t result;
12159 __asm__ ("fminp %d0,%1.2d"
12160 : "=w"(result)
12161 : "w"(a)
12162 : /* No clobbers */);
12163 return result;
12166 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
12167 vpmins_f32 (float32x2_t a)
12169 float32_t result;
12170 __asm__ ("fminp %s0,%1.2s"
12171 : "=w"(result)
12172 : "w"(a)
12173 : /* No clobbers */);
12174 return result;
12177 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12178 vqdmulh_n_s16 (int16x4_t a, int16_t b)
12180 int16x4_t result;
12181 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
12182 : "=w"(result)
12183 : "w"(a), "w"(b)
12184 : /* No clobbers */);
12185 return result;
12188 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12189 vqdmulh_n_s32 (int32x2_t a, int32_t b)
12191 int32x2_t result;
12192 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
12193 : "=w"(result)
12194 : "w"(a), "w"(b)
12195 : /* No clobbers */);
12196 return result;
12199 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12200 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
12202 int16x8_t result;
12203 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
12204 : "=w"(result)
12205 : "w"(a), "w"(b)
12206 : /* No clobbers */);
12207 return result;
12210 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12211 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
12213 int32x4_t result;
12214 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
12215 : "=w"(result)
12216 : "w"(a), "w"(b)
12217 : /* No clobbers */);
12218 return result;
12221 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12222 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
12224 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
12225 __asm__ ("sqxtn2 %0.16b, %1.8h"
12226 : "+w"(result)
12227 : "w"(b)
12228 : /* No clobbers */);
12229 return result;
12232 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12233 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
12235 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
12236 __asm__ ("sqxtn2 %0.8h, %1.4s"
12237 : "+w"(result)
12238 : "w"(b)
12239 : /* No clobbers */);
12240 return result;
12243 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12244 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
12246 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
12247 __asm__ ("sqxtn2 %0.4s, %1.2d"
12248 : "+w"(result)
12249 : "w"(b)
12250 : /* No clobbers */);
12251 return result;
12254 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12255 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
12257 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
12258 __asm__ ("uqxtn2 %0.16b, %1.8h"
12259 : "+w"(result)
12260 : "w"(b)
12261 : /* No clobbers */);
12262 return result;
12265 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12266 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
12268 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
12269 __asm__ ("uqxtn2 %0.8h, %1.4s"
12270 : "+w"(result)
12271 : "w"(b)
12272 : /* No clobbers */);
12273 return result;
12276 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12277 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
12279 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
12280 __asm__ ("uqxtn2 %0.4s, %1.2d"
12281 : "+w"(result)
12282 : "w"(b)
12283 : /* No clobbers */);
12284 return result;
12287 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12288 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
12290 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
12291 __asm__ ("sqxtun2 %0.16b, %1.8h"
12292 : "+w"(result)
12293 : "w"(b)
12294 : /* No clobbers */);
12295 return result;
12298 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12299 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
12301 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
12302 __asm__ ("sqxtun2 %0.8h, %1.4s"
12303 : "+w"(result)
12304 : "w"(b)
12305 : /* No clobbers */);
12306 return result;
12309 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12310 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
12312 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
12313 __asm__ ("sqxtun2 %0.4s, %1.2d"
12314 : "+w"(result)
12315 : "w"(b)
12316 : /* No clobbers */);
12317 return result;
12320 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12321 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
12323 int16x4_t result;
12324 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
12325 : "=w"(result)
12326 : "w"(a), "w"(b)
12327 : /* No clobbers */);
12328 return result;
12331 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12332 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
12334 int32x2_t result;
12335 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
12336 : "=w"(result)
12337 : "w"(a), "w"(b)
12338 : /* No clobbers */);
12339 return result;
12342 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12343 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
12345 int16x8_t result;
12346 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
12347 : "=w"(result)
12348 : "w"(a), "w"(b)
12349 : /* No clobbers */);
12350 return result;
12353 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12354 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
12356 int32x4_t result;
12357 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
12358 : "=w"(result)
12359 : "w"(a), "w"(b)
12360 : /* No clobbers */);
12361 return result;
12364 #define vqrshrn_high_n_s16(a, b, c) \
12365 __extension__ \
12366 ({ \
12367 int16x8_t b_ = (b); \
12368 int8x8_t a_ = (a); \
12369 int8x16_t result = vcombine_s8 \
12370 (a_, vcreate_s8 (UINT64_C (0x0))); \
12371 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
12372 : "+w"(result) \
12373 : "w"(b_), "i"(c) \
12374 : /* No clobbers */); \
12375 result; \
12378 #define vqrshrn_high_n_s32(a, b, c) \
12379 __extension__ \
12380 ({ \
12381 int32x4_t b_ = (b); \
12382 int16x4_t a_ = (a); \
12383 int16x8_t result = vcombine_s16 \
12384 (a_, vcreate_s16 (UINT64_C (0x0))); \
12385 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
12386 : "+w"(result) \
12387 : "w"(b_), "i"(c) \
12388 : /* No clobbers */); \
12389 result; \
12392 #define vqrshrn_high_n_s64(a, b, c) \
12393 __extension__ \
12394 ({ \
12395 int64x2_t b_ = (b); \
12396 int32x2_t a_ = (a); \
12397 int32x4_t result = vcombine_s32 \
12398 (a_, vcreate_s32 (UINT64_C (0x0))); \
12399 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
12400 : "+w"(result) \
12401 : "w"(b_), "i"(c) \
12402 : /* No clobbers */); \
12403 result; \
12406 #define vqrshrn_high_n_u16(a, b, c) \
12407 __extension__ \
12408 ({ \
12409 uint16x8_t b_ = (b); \
12410 uint8x8_t a_ = (a); \
12411 uint8x16_t result = vcombine_u8 \
12412 (a_, vcreate_u8 (UINT64_C (0x0))); \
12413 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
12414 : "+w"(result) \
12415 : "w"(b_), "i"(c) \
12416 : /* No clobbers */); \
12417 result; \
12420 #define vqrshrn_high_n_u32(a, b, c) \
12421 __extension__ \
12422 ({ \
12423 uint32x4_t b_ = (b); \
12424 uint16x4_t a_ = (a); \
12425 uint16x8_t result = vcombine_u16 \
12426 (a_, vcreate_u16 (UINT64_C (0x0))); \
12427 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
12428 : "+w"(result) \
12429 : "w"(b_), "i"(c) \
12430 : /* No clobbers */); \
12431 result; \
12434 #define vqrshrn_high_n_u64(a, b, c) \
12435 __extension__ \
12436 ({ \
12437 uint64x2_t b_ = (b); \
12438 uint32x2_t a_ = (a); \
12439 uint32x4_t result = vcombine_u32 \
12440 (a_, vcreate_u32 (UINT64_C (0x0))); \
12441 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
12442 : "+w"(result) \
12443 : "w"(b_), "i"(c) \
12444 : /* No clobbers */); \
12445 result; \
12448 #define vqrshrun_high_n_s16(a, b, c) \
12449 __extension__ \
12450 ({ \
12451 int16x8_t b_ = (b); \
12452 uint8x8_t a_ = (a); \
12453 uint8x16_t result = vcombine_u8 \
12454 (a_, vcreate_u8 (UINT64_C (0x0))); \
12455 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
12456 : "+w"(result) \
12457 : "w"(b_), "i"(c) \
12458 : /* No clobbers */); \
12459 result; \
12462 #define vqrshrun_high_n_s32(a, b, c) \
12463 __extension__ \
12464 ({ \
12465 int32x4_t b_ = (b); \
12466 uint16x4_t a_ = (a); \
12467 uint16x8_t result = vcombine_u16 \
12468 (a_, vcreate_u16 (UINT64_C (0x0))); \
12469 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
12470 : "+w"(result) \
12471 : "w"(b_), "i"(c) \
12472 : /* No clobbers */); \
12473 result; \
12476 #define vqrshrun_high_n_s64(a, b, c) \
12477 __extension__ \
12478 ({ \
12479 int64x2_t b_ = (b); \
12480 uint32x2_t a_ = (a); \
12481 uint32x4_t result = vcombine_u32 \
12482 (a_, vcreate_u32 (UINT64_C (0x0))); \
12483 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
12484 : "+w"(result) \
12485 : "w"(b_), "i"(c) \
12486 : /* No clobbers */); \
12487 result; \
12490 #define vqshrn_high_n_s16(a, b, c) \
12491 __extension__ \
12492 ({ \
12493 int16x8_t b_ = (b); \
12494 int8x8_t a_ = (a); \
12495 int8x16_t result = vcombine_s8 \
12496 (a_, vcreate_s8 (UINT64_C (0x0))); \
12497 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
12498 : "+w"(result) \
12499 : "w"(b_), "i"(c) \
12500 : /* No clobbers */); \
12501 result; \
12504 #define vqshrn_high_n_s32(a, b, c) \
12505 __extension__ \
12506 ({ \
12507 int32x4_t b_ = (b); \
12508 int16x4_t a_ = (a); \
12509 int16x8_t result = vcombine_s16 \
12510 (a_, vcreate_s16 (UINT64_C (0x0))); \
12511 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
12512 : "+w"(result) \
12513 : "w"(b_), "i"(c) \
12514 : /* No clobbers */); \
12515 result; \
12518 #define vqshrn_high_n_s64(a, b, c) \
12519 __extension__ \
12520 ({ \
12521 int64x2_t b_ = (b); \
12522 int32x2_t a_ = (a); \
12523 int32x4_t result = vcombine_s32 \
12524 (a_, vcreate_s32 (UINT64_C (0x0))); \
12525 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
12526 : "+w"(result) \
12527 : "w"(b_), "i"(c) \
12528 : /* No clobbers */); \
12529 result; \
12532 #define vqshrn_high_n_u16(a, b, c) \
12533 __extension__ \
12534 ({ \
12535 uint16x8_t b_ = (b); \
12536 uint8x8_t a_ = (a); \
12537 uint8x16_t result = vcombine_u8 \
12538 (a_, vcreate_u8 (UINT64_C (0x0))); \
12539 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
12540 : "+w"(result) \
12541 : "w"(b_), "i"(c) \
12542 : /* No clobbers */); \
12543 result; \
12546 #define vqshrn_high_n_u32(a, b, c) \
12547 __extension__ \
12548 ({ \
12549 uint32x4_t b_ = (b); \
12550 uint16x4_t a_ = (a); \
12551 uint16x8_t result = vcombine_u16 \
12552 (a_, vcreate_u16 (UINT64_C (0x0))); \
12553 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
12554 : "+w"(result) \
12555 : "w"(b_), "i"(c) \
12556 : /* No clobbers */); \
12557 result; \
12560 #define vqshrn_high_n_u64(a, b, c) \
12561 __extension__ \
12562 ({ \
12563 uint64x2_t b_ = (b); \
12564 uint32x2_t a_ = (a); \
12565 uint32x4_t result = vcombine_u32 \
12566 (a_, vcreate_u32 (UINT64_C (0x0))); \
12567 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
12568 : "+w"(result) \
12569 : "w"(b_), "i"(c) \
12570 : /* No clobbers */); \
12571 result; \
12574 #define vqshrun_high_n_s16(a, b, c) \
12575 __extension__ \
12576 ({ \
12577 int16x8_t b_ = (b); \
12578 uint8x8_t a_ = (a); \
12579 uint8x16_t result = vcombine_u8 \
12580 (a_, vcreate_u8 (UINT64_C (0x0))); \
12581 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
12582 : "+w"(result) \
12583 : "w"(b_), "i"(c) \
12584 : /* No clobbers */); \
12585 result; \
12588 #define vqshrun_high_n_s32(a, b, c) \
12589 __extension__ \
12590 ({ \
12591 int32x4_t b_ = (b); \
12592 uint16x4_t a_ = (a); \
12593 uint16x8_t result = vcombine_u16 \
12594 (a_, vcreate_u16 (UINT64_C (0x0))); \
12595 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
12596 : "+w"(result) \
12597 : "w"(b_), "i"(c) \
12598 : /* No clobbers */); \
12599 result; \
12602 #define vqshrun_high_n_s64(a, b, c) \
12603 __extension__ \
12604 ({ \
12605 int64x2_t b_ = (b); \
12606 uint32x2_t a_ = (a); \
12607 uint32x4_t result = vcombine_u32 \
12608 (a_, vcreate_u32 (UINT64_C (0x0))); \
12609 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
12610 : "+w"(result) \
12611 : "w"(b_), "i"(c) \
12612 : /* No clobbers */); \
12613 result; \
12616 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12617 vrbit_s8 (int8x8_t a)
12619 int8x8_t result;
12620 __asm__ ("rbit %0.8b,%1.8b"
12621 : "=w"(result)
12622 : "w"(a)
12623 : /* No clobbers */);
12624 return result;
12627 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12628 vrbit_u8 (uint8x8_t a)
12630 uint8x8_t result;
12631 __asm__ ("rbit %0.8b,%1.8b"
12632 : "=w"(result)
12633 : "w"(a)
12634 : /* No clobbers */);
12635 return result;
12638 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12639 vrbitq_s8 (int8x16_t a)
12641 int8x16_t result;
12642 __asm__ ("rbit %0.16b,%1.16b"
12643 : "=w"(result)
12644 : "w"(a)
12645 : /* No clobbers */);
12646 return result;
12649 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12650 vrbitq_u8 (uint8x16_t a)
12652 uint8x16_t result;
12653 __asm__ ("rbit %0.16b,%1.16b"
12654 : "=w"(result)
12655 : "w"(a)
12656 : /* No clobbers */);
12657 return result;
12660 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12661 vrecpe_u32 (uint32x2_t a)
12663 uint32x2_t result;
12664 __asm__ ("urecpe %0.2s,%1.2s"
12665 : "=w"(result)
12666 : "w"(a)
12667 : /* No clobbers */);
12668 return result;
12671 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
12672 vrecpeq_u32 (uint32x4_t a)
12674 uint32x4_t result;
12675 __asm__ ("urecpe %0.4s,%1.4s"
12676 : "=w"(result)
12677 : "w"(a)
12678 : /* No clobbers */);
12679 return result;
12682 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12683 vrev16_p8 (poly8x8_t a)
12685 poly8x8_t result;
12686 __asm__ ("rev16 %0.8b,%1.8b"
12687 : "=w"(result)
12688 : "w"(a)
12689 : /* No clobbers */);
12690 return result;
12693 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12694 vrev16_s8 (int8x8_t a)
12696 int8x8_t result;
12697 __asm__ ("rev16 %0.8b,%1.8b"
12698 : "=w"(result)
12699 : "w"(a)
12700 : /* No clobbers */);
12701 return result;
12704 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12705 vrev16_u8 (uint8x8_t a)
12707 uint8x8_t result;
12708 __asm__ ("rev16 %0.8b,%1.8b"
12709 : "=w"(result)
12710 : "w"(a)
12711 : /* No clobbers */);
12712 return result;
12715 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12716 vrev16q_p8 (poly8x16_t a)
12718 poly8x16_t result;
12719 __asm__ ("rev16 %0.16b,%1.16b"
12720 : "=w"(result)
12721 : "w"(a)
12722 : /* No clobbers */);
12723 return result;
12726 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12727 vrev16q_s8 (int8x16_t a)
12729 int8x16_t result;
12730 __asm__ ("rev16 %0.16b,%1.16b"
12731 : "=w"(result)
12732 : "w"(a)
12733 : /* No clobbers */);
12734 return result;
12737 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12738 vrev16q_u8 (uint8x16_t a)
12740 uint8x16_t result;
12741 __asm__ ("rev16 %0.16b,%1.16b"
12742 : "=w"(result)
12743 : "w"(a)
12744 : /* No clobbers */);
12745 return result;
12748 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12749 vrev32_p8 (poly8x8_t a)
12751 poly8x8_t result;
12752 __asm__ ("rev32 %0.8b,%1.8b"
12753 : "=w"(result)
12754 : "w"(a)
12755 : /* No clobbers */);
12756 return result;
12759 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12760 vrev32_p16 (poly16x4_t a)
12762 poly16x4_t result;
12763 __asm__ ("rev32 %0.4h,%1.4h"
12764 : "=w"(result)
12765 : "w"(a)
12766 : /* No clobbers */);
12767 return result;
12770 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12771 vrev32_s8 (int8x8_t a)
12773 int8x8_t result;
12774 __asm__ ("rev32 %0.8b,%1.8b"
12775 : "=w"(result)
12776 : "w"(a)
12777 : /* No clobbers */);
12778 return result;
12781 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12782 vrev32_s16 (int16x4_t a)
12784 int16x4_t result;
12785 __asm__ ("rev32 %0.4h,%1.4h"
12786 : "=w"(result)
12787 : "w"(a)
12788 : /* No clobbers */);
12789 return result;
12792 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12793 vrev32_u8 (uint8x8_t a)
12795 uint8x8_t result;
12796 __asm__ ("rev32 %0.8b,%1.8b"
12797 : "=w"(result)
12798 : "w"(a)
12799 : /* No clobbers */);
12800 return result;
12803 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12804 vrev32_u16 (uint16x4_t a)
12806 uint16x4_t result;
12807 __asm__ ("rev32 %0.4h,%1.4h"
12808 : "=w"(result)
12809 : "w"(a)
12810 : /* No clobbers */);
12811 return result;
12814 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12815 vrev32q_p8 (poly8x16_t a)
12817 poly8x16_t result;
12818 __asm__ ("rev32 %0.16b,%1.16b"
12819 : "=w"(result)
12820 : "w"(a)
12821 : /* No clobbers */);
12822 return result;
12825 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
12826 vrev32q_p16 (poly16x8_t a)
12828 poly16x8_t result;
12829 __asm__ ("rev32 %0.8h,%1.8h"
12830 : "=w"(result)
12831 : "w"(a)
12832 : /* No clobbers */);
12833 return result;
12836 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12837 vrev32q_s8 (int8x16_t a)
12839 int8x16_t result;
12840 __asm__ ("rev32 %0.16b,%1.16b"
12841 : "=w"(result)
12842 : "w"(a)
12843 : /* No clobbers */);
12844 return result;
12847 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12848 vrev32q_s16 (int16x8_t a)
12850 int16x8_t result;
12851 __asm__ ("rev32 %0.8h,%1.8h"
12852 : "=w"(result)
12853 : "w"(a)
12854 : /* No clobbers */);
12855 return result;
12858 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12859 vrev32q_u8 (uint8x16_t a)
12861 uint8x16_t result;
12862 __asm__ ("rev32 %0.16b,%1.16b"
12863 : "=w"(result)
12864 : "w"(a)
12865 : /* No clobbers */);
12866 return result;
12869 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
12870 vrev32q_u16 (uint16x8_t a)
12872 uint16x8_t result;
12873 __asm__ ("rev32 %0.8h,%1.8h"
12874 : "=w"(result)
12875 : "w"(a)
12876 : /* No clobbers */);
12877 return result;
12880 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
12881 vrev64_f32 (float32x2_t a)
12883 float32x2_t result;
12884 __asm__ ("rev64 %0.2s,%1.2s"
12885 : "=w"(result)
12886 : "w"(a)
12887 : /* No clobbers */);
12888 return result;
12891 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12892 vrev64_p8 (poly8x8_t a)
12894 poly8x8_t result;
12895 __asm__ ("rev64 %0.8b,%1.8b"
12896 : "=w"(result)
12897 : "w"(a)
12898 : /* No clobbers */);
12899 return result;
12902 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
12903 vrev64_p16 (poly16x4_t a)
12905 poly16x4_t result;
12906 __asm__ ("rev64 %0.4h,%1.4h"
12907 : "=w"(result)
12908 : "w"(a)
12909 : /* No clobbers */);
12910 return result;
12913 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12914 vrev64_s8 (int8x8_t a)
12916 int8x8_t result;
12917 __asm__ ("rev64 %0.8b,%1.8b"
12918 : "=w"(result)
12919 : "w"(a)
12920 : /* No clobbers */);
12921 return result;
12924 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12925 vrev64_s16 (int16x4_t a)
12927 int16x4_t result;
12928 __asm__ ("rev64 %0.4h,%1.4h"
12929 : "=w"(result)
12930 : "w"(a)
12931 : /* No clobbers */);
12932 return result;
12935 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12936 vrev64_s32 (int32x2_t a)
12938 int32x2_t result;
12939 __asm__ ("rev64 %0.2s,%1.2s"
12940 : "=w"(result)
12941 : "w"(a)
12942 : /* No clobbers */);
12943 return result;
12946 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12947 vrev64_u8 (uint8x8_t a)
12949 uint8x8_t result;
12950 __asm__ ("rev64 %0.8b,%1.8b"
12951 : "=w"(result)
12952 : "w"(a)
12953 : /* No clobbers */);
12954 return result;
12957 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
12958 vrev64_u16 (uint16x4_t a)
12960 uint16x4_t result;
12961 __asm__ ("rev64 %0.4h,%1.4h"
12962 : "=w"(result)
12963 : "w"(a)
12964 : /* No clobbers */);
12965 return result;
12968 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
12969 vrev64_u32 (uint32x2_t a)
12971 uint32x2_t result;
12972 __asm__ ("rev64 %0.2s,%1.2s"
12973 : "=w"(result)
12974 : "w"(a)
12975 : /* No clobbers */);
12976 return result;
12979 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
12980 vrev64q_f32 (float32x4_t a)
12982 float32x4_t result;
12983 __asm__ ("rev64 %0.4s,%1.4s"
12984 : "=w"(result)
12985 : "w"(a)
12986 : /* No clobbers */);
12987 return result;
12990 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12991 vrev64q_p8 (poly8x16_t a)
12993 poly8x16_t result;
12994 __asm__ ("rev64 %0.16b,%1.16b"
12995 : "=w"(result)
12996 : "w"(a)
12997 : /* No clobbers */);
12998 return result;
13001 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13002 vrev64q_p16 (poly16x8_t a)
13004 poly16x8_t result;
13005 __asm__ ("rev64 %0.8h,%1.8h"
13006 : "=w"(result)
13007 : "w"(a)
13008 : /* No clobbers */);
13009 return result;
13012 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13013 vrev64q_s8 (int8x16_t a)
13015 int8x16_t result;
13016 __asm__ ("rev64 %0.16b,%1.16b"
13017 : "=w"(result)
13018 : "w"(a)
13019 : /* No clobbers */);
13020 return result;
13023 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13024 vrev64q_s16 (int16x8_t a)
13026 int16x8_t result;
13027 __asm__ ("rev64 %0.8h,%1.8h"
13028 : "=w"(result)
13029 : "w"(a)
13030 : /* No clobbers */);
13031 return result;
13034 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13035 vrev64q_s32 (int32x4_t a)
13037 int32x4_t result;
13038 __asm__ ("rev64 %0.4s,%1.4s"
13039 : "=w"(result)
13040 : "w"(a)
13041 : /* No clobbers */);
13042 return result;
13045 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13046 vrev64q_u8 (uint8x16_t a)
13048 uint8x16_t result;
13049 __asm__ ("rev64 %0.16b,%1.16b"
13050 : "=w"(result)
13051 : "w"(a)
13052 : /* No clobbers */);
13053 return result;
13056 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13057 vrev64q_u16 (uint16x8_t a)
13059 uint16x8_t result;
13060 __asm__ ("rev64 %0.8h,%1.8h"
13061 : "=w"(result)
13062 : "w"(a)
13063 : /* No clobbers */);
13064 return result;
13067 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13068 vrev64q_u32 (uint32x4_t a)
13070 uint32x4_t result;
13071 __asm__ ("rev64 %0.4s,%1.4s"
13072 : "=w"(result)
13073 : "w"(a)
13074 : /* No clobbers */);
13075 return result;
13078 #define vrshrn_high_n_s16(a, b, c) \
13079 __extension__ \
13080 ({ \
13081 int16x8_t b_ = (b); \
13082 int8x8_t a_ = (a); \
13083 int8x16_t result = vcombine_s8 \
13084 (a_, vcreate_s8 (UINT64_C (0x0))); \
13085 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
13086 : "+w"(result) \
13087 : "w"(b_), "i"(c) \
13088 : /* No clobbers */); \
13089 result; \
13092 #define vrshrn_high_n_s32(a, b, c) \
13093 __extension__ \
13094 ({ \
13095 int32x4_t b_ = (b); \
13096 int16x4_t a_ = (a); \
13097 int16x8_t result = vcombine_s16 \
13098 (a_, vcreate_s16 (UINT64_C (0x0))); \
13099 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
13100 : "+w"(result) \
13101 : "w"(b_), "i"(c) \
13102 : /* No clobbers */); \
13103 result; \
13106 #define vrshrn_high_n_s64(a, b, c) \
13107 __extension__ \
13108 ({ \
13109 int64x2_t b_ = (b); \
13110 int32x2_t a_ = (a); \
13111 int32x4_t result = vcombine_s32 \
13112 (a_, vcreate_s32 (UINT64_C (0x0))); \
13113 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
13114 : "+w"(result) \
13115 : "w"(b_), "i"(c) \
13116 : /* No clobbers */); \
13117 result; \
13120 #define vrshrn_high_n_u16(a, b, c) \
13121 __extension__ \
13122 ({ \
13123 uint16x8_t b_ = (b); \
13124 uint8x8_t a_ = (a); \
13125 uint8x16_t result = vcombine_u8 \
13126 (a_, vcreate_u8 (UINT64_C (0x0))); \
13127 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
13128 : "+w"(result) \
13129 : "w"(b_), "i"(c) \
13130 : /* No clobbers */); \
13131 result; \
13134 #define vrshrn_high_n_u32(a, b, c) \
13135 __extension__ \
13136 ({ \
13137 uint32x4_t b_ = (b); \
13138 uint16x4_t a_ = (a); \
13139 uint16x8_t result = vcombine_u16 \
13140 (a_, vcreate_u16 (UINT64_C (0x0))); \
13141 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
13142 : "+w"(result) \
13143 : "w"(b_), "i"(c) \
13144 : /* No clobbers */); \
13145 result; \
13148 #define vrshrn_high_n_u64(a, b, c) \
13149 __extension__ \
13150 ({ \
13151 uint64x2_t b_ = (b); \
13152 uint32x2_t a_ = (a); \
13153 uint32x4_t result = vcombine_u32 \
13154 (a_, vcreate_u32 (UINT64_C (0x0))); \
13155 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
13156 : "+w"(result) \
13157 : "w"(b_), "i"(c) \
13158 : /* No clobbers */); \
13159 result; \
13162 #define vrshrn_n_s16(a, b) \
13163 __extension__ \
13164 ({ \
13165 int16x8_t a_ = (a); \
13166 int8x8_t result; \
13167 __asm__ ("rshrn %0.8b,%1.8h,%2" \
13168 : "=w"(result) \
13169 : "w"(a_), "i"(b) \
13170 : /* No clobbers */); \
13171 result; \
13174 #define vrshrn_n_s32(a, b) \
13175 __extension__ \
13176 ({ \
13177 int32x4_t a_ = (a); \
13178 int16x4_t result; \
13179 __asm__ ("rshrn %0.4h,%1.4s,%2" \
13180 : "=w"(result) \
13181 : "w"(a_), "i"(b) \
13182 : /* No clobbers */); \
13183 result; \
13186 #define vrshrn_n_s64(a, b) \
13187 __extension__ \
13188 ({ \
13189 int64x2_t a_ = (a); \
13190 int32x2_t result; \
13191 __asm__ ("rshrn %0.2s,%1.2d,%2" \
13192 : "=w"(result) \
13193 : "w"(a_), "i"(b) \
13194 : /* No clobbers */); \
13195 result; \
13198 #define vrshrn_n_u16(a, b) \
13199 __extension__ \
13200 ({ \
13201 uint16x8_t a_ = (a); \
13202 uint8x8_t result; \
13203 __asm__ ("rshrn %0.8b,%1.8h,%2" \
13204 : "=w"(result) \
13205 : "w"(a_), "i"(b) \
13206 : /* No clobbers */); \
13207 result; \
13210 #define vrshrn_n_u32(a, b) \
13211 __extension__ \
13212 ({ \
13213 uint32x4_t a_ = (a); \
13214 uint16x4_t result; \
13215 __asm__ ("rshrn %0.4h,%1.4s,%2" \
13216 : "=w"(result) \
13217 : "w"(a_), "i"(b) \
13218 : /* No clobbers */); \
13219 result; \
13222 #define vrshrn_n_u64(a, b) \
13223 __extension__ \
13224 ({ \
13225 uint64x2_t a_ = (a); \
13226 uint32x2_t result; \
13227 __asm__ ("rshrn %0.2s,%1.2d,%2" \
13228 : "=w"(result) \
13229 : "w"(a_), "i"(b) \
13230 : /* No clobbers */); \
13231 result; \
13234 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13235 vrsqrte_f32 (float32x2_t a)
13237 float32x2_t result;
13238 __asm__ ("frsqrte %0.2s,%1.2s"
13239 : "=w"(result)
13240 : "w"(a)
13241 : /* No clobbers */);
13242 return result;
13245 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13246 vrsqrte_f64 (float64x2_t a)
13248 float64x2_t result;
13249 __asm__ ("frsqrte %0.2d,%1.2d"
13250 : "=w"(result)
13251 : "w"(a)
13252 : /* No clobbers */);
13253 return result;
13256 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13257 vrsqrte_u32 (uint32x2_t a)
13259 uint32x2_t result;
13260 __asm__ ("ursqrte %0.2s,%1.2s"
13261 : "=w"(result)
13262 : "w"(a)
13263 : /* No clobbers */);
13264 return result;
13267 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13268 vrsqrted_f64 (float64_t a)
13270 float64_t result;
13271 __asm__ ("frsqrte %d0,%d1"
13272 : "=w"(result)
13273 : "w"(a)
13274 : /* No clobbers */);
13275 return result;
13278 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13279 vrsqrteq_f32 (float32x4_t a)
13281 float32x4_t result;
13282 __asm__ ("frsqrte %0.4s,%1.4s"
13283 : "=w"(result)
13284 : "w"(a)
13285 : /* No clobbers */);
13286 return result;
13289 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13290 vrsqrteq_f64 (float64x2_t a)
13292 float64x2_t result;
13293 __asm__ ("frsqrte %0.2d,%1.2d"
13294 : "=w"(result)
13295 : "w"(a)
13296 : /* No clobbers */);
13297 return result;
13300 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13301 vrsqrteq_u32 (uint32x4_t a)
13303 uint32x4_t result;
13304 __asm__ ("ursqrte %0.4s,%1.4s"
13305 : "=w"(result)
13306 : "w"(a)
13307 : /* No clobbers */);
13308 return result;
13311 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13312 vrsqrtes_f32 (float32_t a)
13314 float32_t result;
13315 __asm__ ("frsqrte %s0,%s1"
13316 : "=w"(result)
13317 : "w"(a)
13318 : /* No clobbers */);
13319 return result;
13322 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13323 vrsqrts_f32 (float32x2_t a, float32x2_t b)
13325 float32x2_t result;
13326 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
13327 : "=w"(result)
13328 : "w"(a), "w"(b)
13329 : /* No clobbers */);
13330 return result;
13333 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13334 vrsqrtsd_f64 (float64_t a, float64_t b)
13336 float64_t result;
13337 __asm__ ("frsqrts %d0,%d1,%d2"
13338 : "=w"(result)
13339 : "w"(a), "w"(b)
13340 : /* No clobbers */);
13341 return result;
13344 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13345 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
13347 float32x4_t result;
13348 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
13349 : "=w"(result)
13350 : "w"(a), "w"(b)
13351 : /* No clobbers */);
13352 return result;
13355 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13356 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
13358 float64x2_t result;
13359 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
13360 : "=w"(result)
13361 : "w"(a), "w"(b)
13362 : /* No clobbers */);
13363 return result;
13366 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13367 vrsqrtss_f32 (float32_t a, float32_t b)
13369 float32_t result;
13370 __asm__ ("frsqrts %s0,%s1,%s2"
13371 : "=w"(result)
13372 : "w"(a), "w"(b)
13373 : /* No clobbers */);
13374 return result;
13377 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13378 vrsrtsq_f64 (float64x2_t a, float64x2_t b)
13380 float64x2_t result;
13381 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
13382 : "=w"(result)
13383 : "w"(a), "w"(b)
13384 : /* No clobbers */);
13385 return result;
13388 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13389 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
13391 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
13392 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
13393 : "+w"(result)
13394 : "w"(b), "w"(c)
13395 : /* No clobbers */);
13396 return result;
13399 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13400 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
13402 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
13403 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
13404 : "+w"(result)
13405 : "w"(b), "w"(c)
13406 : /* No clobbers */);
13407 return result;
13410 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13411 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
13413 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
13414 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
13415 : "+w"(result)
13416 : "w"(b), "w"(c)
13417 : /* No clobbers */);
13418 return result;
13421 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13422 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
13424 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
13425 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
13426 : "+w"(result)
13427 : "w"(b), "w"(c)
13428 : /* No clobbers */);
13429 return result;
13432 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13433 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
13435 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
13436 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
13437 : "+w"(result)
13438 : "w"(b), "w"(c)
13439 : /* No clobbers */);
13440 return result;
13443 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13444 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
13446 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
13447 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
13448 : "+w"(result)
13449 : "w"(b), "w"(c)
13450 : /* No clobbers */);
13451 return result;
13454 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13455 vrsubhn_s16 (int16x8_t a, int16x8_t b)
13457 int8x8_t result;
13458 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
13459 : "=w"(result)
13460 : "w"(a), "w"(b)
13461 : /* No clobbers */);
13462 return result;
13465 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13466 vrsubhn_s32 (int32x4_t a, int32x4_t b)
13468 int16x4_t result;
13469 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
13470 : "=w"(result)
13471 : "w"(a), "w"(b)
13472 : /* No clobbers */);
13473 return result;
13476 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13477 vrsubhn_s64 (int64x2_t a, int64x2_t b)
13479 int32x2_t result;
13480 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
13481 : "=w"(result)
13482 : "w"(a), "w"(b)
13483 : /* No clobbers */);
13484 return result;
13487 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13488 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
13490 uint8x8_t result;
13491 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
13492 : "=w"(result)
13493 : "w"(a), "w"(b)
13494 : /* No clobbers */);
13495 return result;
13498 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13499 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
13501 uint16x4_t result;
13502 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
13503 : "=w"(result)
13504 : "w"(a), "w"(b)
13505 : /* No clobbers */);
13506 return result;
13509 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13510 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
13512 uint32x2_t result;
13513 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
13514 : "=w"(result)
13515 : "w"(a), "w"(b)
13516 : /* No clobbers */);
13517 return result;
13520 #define vset_lane_f32(a, b, c) \
13521 __extension__ \
13522 ({ \
13523 float32x2_t b_ = (b); \
13524 float32_t a_ = (a); \
13525 float32x2_t result; \
13526 __asm__ ("ins %0.s[%3], %w1" \
13527 : "=w"(result) \
13528 : "r"(a_), "0"(b_), "i"(c) \
13529 : /* No clobbers */); \
13530 result; \
13533 #define vset_lane_f64(a, b, c) \
13534 __extension__ \
13535 ({ \
13536 float64x1_t b_ = (b); \
13537 float64_t a_ = (a); \
13538 float64x1_t result; \
13539 __asm__ ("ins %0.d[%3], %x1" \
13540 : "=w"(result) \
13541 : "r"(a_), "0"(b_), "i"(c) \
13542 : /* No clobbers */); \
13543 result; \
13546 #define vset_lane_p8(a, b, c) \
13547 __extension__ \
13548 ({ \
13549 poly8x8_t b_ = (b); \
13550 poly8_t a_ = (a); \
13551 poly8x8_t result; \
13552 __asm__ ("ins %0.b[%3], %w1" \
13553 : "=w"(result) \
13554 : "r"(a_), "0"(b_), "i"(c) \
13555 : /* No clobbers */); \
13556 result; \
13559 #define vset_lane_p16(a, b, c) \
13560 __extension__ \
13561 ({ \
13562 poly16x4_t b_ = (b); \
13563 poly16_t a_ = (a); \
13564 poly16x4_t result; \
13565 __asm__ ("ins %0.h[%3], %w1" \
13566 : "=w"(result) \
13567 : "r"(a_), "0"(b_), "i"(c) \
13568 : /* No clobbers */); \
13569 result; \
13572 #define vset_lane_s8(a, b, c) \
13573 __extension__ \
13574 ({ \
13575 int8x8_t b_ = (b); \
13576 int8_t a_ = (a); \
13577 int8x8_t result; \
13578 __asm__ ("ins %0.b[%3], %w1" \
13579 : "=w"(result) \
13580 : "r"(a_), "0"(b_), "i"(c) \
13581 : /* No clobbers */); \
13582 result; \
13585 #define vset_lane_s16(a, b, c) \
13586 __extension__ \
13587 ({ \
13588 int16x4_t b_ = (b); \
13589 int16_t a_ = (a); \
13590 int16x4_t result; \
13591 __asm__ ("ins %0.h[%3], %w1" \
13592 : "=w"(result) \
13593 : "r"(a_), "0"(b_), "i"(c) \
13594 : /* No clobbers */); \
13595 result; \
13598 #define vset_lane_s32(a, b, c) \
13599 __extension__ \
13600 ({ \
13601 int32x2_t b_ = (b); \
13602 int32_t a_ = (a); \
13603 int32x2_t result; \
13604 __asm__ ("ins %0.s[%3], %w1" \
13605 : "=w"(result) \
13606 : "r"(a_), "0"(b_), "i"(c) \
13607 : /* No clobbers */); \
13608 result; \
13611 #define vset_lane_s64(a, b, c) \
13612 __extension__ \
13613 ({ \
13614 int64x1_t b_ = (b); \
13615 int64_t a_ = (a); \
13616 int64x1_t result; \
13617 __asm__ ("ins %0.d[%3], %x1" \
13618 : "=w"(result) \
13619 : "r"(a_), "0"(b_), "i"(c) \
13620 : /* No clobbers */); \
13621 result; \
13624 #define vset_lane_u8(a, b, c) \
13625 __extension__ \
13626 ({ \
13627 uint8x8_t b_ = (b); \
13628 uint8_t a_ = (a); \
13629 uint8x8_t result; \
13630 __asm__ ("ins %0.b[%3], %w1" \
13631 : "=w"(result) \
13632 : "r"(a_), "0"(b_), "i"(c) \
13633 : /* No clobbers */); \
13634 result; \
13637 #define vset_lane_u16(a, b, c) \
13638 __extension__ \
13639 ({ \
13640 uint16x4_t b_ = (b); \
13641 uint16_t a_ = (a); \
13642 uint16x4_t result; \
13643 __asm__ ("ins %0.h[%3], %w1" \
13644 : "=w"(result) \
13645 : "r"(a_), "0"(b_), "i"(c) \
13646 : /* No clobbers */); \
13647 result; \
13650 #define vset_lane_u32(a, b, c) \
13651 __extension__ \
13652 ({ \
13653 uint32x2_t b_ = (b); \
13654 uint32_t a_ = (a); \
13655 uint32x2_t result; \
13656 __asm__ ("ins %0.s[%3], %w1" \
13657 : "=w"(result) \
13658 : "r"(a_), "0"(b_), "i"(c) \
13659 : /* No clobbers */); \
13660 result; \
13663 #define vset_lane_u64(a, b, c) \
13664 __extension__ \
13665 ({ \
13666 uint64x1_t b_ = (b); \
13667 uint64_t a_ = (a); \
13668 uint64x1_t result; \
13669 __asm__ ("ins %0.d[%3], %x1" \
13670 : "=w"(result) \
13671 : "r"(a_), "0"(b_), "i"(c) \
13672 : /* No clobbers */); \
13673 result; \
13676 #define vsetq_lane_f32(a, b, c) \
13677 __extension__ \
13678 ({ \
13679 float32x4_t b_ = (b); \
13680 float32_t a_ = (a); \
13681 float32x4_t result; \
13682 __asm__ ("ins %0.s[%3], %w1" \
13683 : "=w"(result) \
13684 : "r"(a_), "0"(b_), "i"(c) \
13685 : /* No clobbers */); \
13686 result; \
13689 #define vsetq_lane_f64(a, b, c) \
13690 __extension__ \
13691 ({ \
13692 float64x2_t b_ = (b); \
13693 float64_t a_ = (a); \
13694 float64x2_t result; \
13695 __asm__ ("ins %0.d[%3], %x1" \
13696 : "=w"(result) \
13697 : "r"(a_), "0"(b_), "i"(c) \
13698 : /* No clobbers */); \
13699 result; \
13702 #define vsetq_lane_p8(a, b, c) \
13703 __extension__ \
13704 ({ \
13705 poly8x16_t b_ = (b); \
13706 poly8_t a_ = (a); \
13707 poly8x16_t result; \
13708 __asm__ ("ins %0.b[%3], %w1" \
13709 : "=w"(result) \
13710 : "r"(a_), "0"(b_), "i"(c) \
13711 : /* No clobbers */); \
13712 result; \
13715 #define vsetq_lane_p16(a, b, c) \
13716 __extension__ \
13717 ({ \
13718 poly16x8_t b_ = (b); \
13719 poly16_t a_ = (a); \
13720 poly16x8_t result; \
13721 __asm__ ("ins %0.h[%3], %w1" \
13722 : "=w"(result) \
13723 : "r"(a_), "0"(b_), "i"(c) \
13724 : /* No clobbers */); \
13725 result; \
13728 #define vsetq_lane_s8(a, b, c) \
13729 __extension__ \
13730 ({ \
13731 int8x16_t b_ = (b); \
13732 int8_t a_ = (a); \
13733 int8x16_t result; \
13734 __asm__ ("ins %0.b[%3], %w1" \
13735 : "=w"(result) \
13736 : "r"(a_), "0"(b_), "i"(c) \
13737 : /* No clobbers */); \
13738 result; \
13741 #define vsetq_lane_s16(a, b, c) \
13742 __extension__ \
13743 ({ \
13744 int16x8_t b_ = (b); \
13745 int16_t a_ = (a); \
13746 int16x8_t result; \
13747 __asm__ ("ins %0.h[%3], %w1" \
13748 : "=w"(result) \
13749 : "r"(a_), "0"(b_), "i"(c) \
13750 : /* No clobbers */); \
13751 result; \
13754 #define vsetq_lane_s32(a, b, c) \
13755 __extension__ \
13756 ({ \
13757 int32x4_t b_ = (b); \
13758 int32_t a_ = (a); \
13759 int32x4_t result; \
13760 __asm__ ("ins %0.s[%3], %w1" \
13761 : "=w"(result) \
13762 : "r"(a_), "0"(b_), "i"(c) \
13763 : /* No clobbers */); \
13764 result; \
13767 #define vsetq_lane_s64(a, b, c) \
13768 __extension__ \
13769 ({ \
13770 int64x2_t b_ = (b); \
13771 int64_t a_ = (a); \
13772 int64x2_t result; \
13773 __asm__ ("ins %0.d[%3], %x1" \
13774 : "=w"(result) \
13775 : "r"(a_), "0"(b_), "i"(c) \
13776 : /* No clobbers */); \
13777 result; \
13780 #define vsetq_lane_u8(a, b, c) \
13781 __extension__ \
13782 ({ \
13783 uint8x16_t b_ = (b); \
13784 uint8_t a_ = (a); \
13785 uint8x16_t result; \
13786 __asm__ ("ins %0.b[%3], %w1" \
13787 : "=w"(result) \
13788 : "r"(a_), "0"(b_), "i"(c) \
13789 : /* No clobbers */); \
13790 result; \
13793 #define vsetq_lane_u16(a, b, c) \
13794 __extension__ \
13795 ({ \
13796 uint16x8_t b_ = (b); \
13797 uint16_t a_ = (a); \
13798 uint16x8_t result; \
13799 __asm__ ("ins %0.h[%3], %w1" \
13800 : "=w"(result) \
13801 : "r"(a_), "0"(b_), "i"(c) \
13802 : /* No clobbers */); \
13803 result; \
13806 #define vsetq_lane_u32(a, b, c) \
13807 __extension__ \
13808 ({ \
13809 uint32x4_t b_ = (b); \
13810 uint32_t a_ = (a); \
13811 uint32x4_t result; \
13812 __asm__ ("ins %0.s[%3], %w1" \
13813 : "=w"(result) \
13814 : "r"(a_), "0"(b_), "i"(c) \
13815 : /* No clobbers */); \
13816 result; \
13819 #define vsetq_lane_u64(a, b, c) \
13820 __extension__ \
13821 ({ \
13822 uint64x2_t b_ = (b); \
13823 uint64_t a_ = (a); \
13824 uint64x2_t result; \
13825 __asm__ ("ins %0.d[%3], %x1" \
13826 : "=w"(result) \
13827 : "r"(a_), "0"(b_), "i"(c) \
13828 : /* No clobbers */); \
13829 result; \
13832 #define vshrn_high_n_s16(a, b, c) \
13833 __extension__ \
13834 ({ \
13835 int16x8_t b_ = (b); \
13836 int8x8_t a_ = (a); \
13837 int8x16_t result = vcombine_s8 \
13838 (a_, vcreate_s8 (UINT64_C (0x0))); \
13839 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
13840 : "+w"(result) \
13841 : "w"(b_), "i"(c) \
13842 : /* No clobbers */); \
13843 result; \
13846 #define vshrn_high_n_s32(a, b, c) \
13847 __extension__ \
13848 ({ \
13849 int32x4_t b_ = (b); \
13850 int16x4_t a_ = (a); \
13851 int16x8_t result = vcombine_s16 \
13852 (a_, vcreate_s16 (UINT64_C (0x0))); \
13853 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
13854 : "+w"(result) \
13855 : "w"(b_), "i"(c) \
13856 : /* No clobbers */); \
13857 result; \
13860 #define vshrn_high_n_s64(a, b, c) \
13861 __extension__ \
13862 ({ \
13863 int64x2_t b_ = (b); \
13864 int32x2_t a_ = (a); \
13865 int32x4_t result = vcombine_s32 \
13866 (a_, vcreate_s32 (UINT64_C (0x0))); \
13867 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
13868 : "+w"(result) \
13869 : "w"(b_), "i"(c) \
13870 : /* No clobbers */); \
13871 result; \
13874 #define vshrn_high_n_u16(a, b, c) \
13875 __extension__ \
13876 ({ \
13877 uint16x8_t b_ = (b); \
13878 uint8x8_t a_ = (a); \
13879 uint8x16_t result = vcombine_u8 \
13880 (a_, vcreate_u8 (UINT64_C (0x0))); \
13881 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
13882 : "+w"(result) \
13883 : "w"(b_), "i"(c) \
13884 : /* No clobbers */); \
13885 result; \
13888 #define vshrn_high_n_u32(a, b, c) \
13889 __extension__ \
13890 ({ \
13891 uint32x4_t b_ = (b); \
13892 uint16x4_t a_ = (a); \
13893 uint16x8_t result = vcombine_u16 \
13894 (a_, vcreate_u16 (UINT64_C (0x0))); \
13895 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
13896 : "+w"(result) \
13897 : "w"(b_), "i"(c) \
13898 : /* No clobbers */); \
13899 result; \
13902 #define vshrn_high_n_u64(a, b, c) \
13903 __extension__ \
13904 ({ \
13905 uint64x2_t b_ = (b); \
13906 uint32x2_t a_ = (a); \
13907 uint32x4_t result = vcombine_u32 \
13908 (a_, vcreate_u32 (UINT64_C (0x0))); \
13909 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
13910 : "+w"(result) \
13911 : "w"(b_), "i"(c) \
13912 : /* No clobbers */); \
13913 result; \
13916 #define vshrn_n_s16(a, b) \
13917 __extension__ \
13918 ({ \
13919 int16x8_t a_ = (a); \
13920 int8x8_t result; \
13921 __asm__ ("shrn %0.8b,%1.8h,%2" \
13922 : "=w"(result) \
13923 : "w"(a_), "i"(b) \
13924 : /* No clobbers */); \
13925 result; \
13928 #define vshrn_n_s32(a, b) \
13929 __extension__ \
13930 ({ \
13931 int32x4_t a_ = (a); \
13932 int16x4_t result; \
13933 __asm__ ("shrn %0.4h,%1.4s,%2" \
13934 : "=w"(result) \
13935 : "w"(a_), "i"(b) \
13936 : /* No clobbers */); \
13937 result; \
13940 #define vshrn_n_s64(a, b) \
13941 __extension__ \
13942 ({ \
13943 int64x2_t a_ = (a); \
13944 int32x2_t result; \
13945 __asm__ ("shrn %0.2s,%1.2d,%2" \
13946 : "=w"(result) \
13947 : "w"(a_), "i"(b) \
13948 : /* No clobbers */); \
13949 result; \
13952 #define vshrn_n_u16(a, b) \
13953 __extension__ \
13954 ({ \
13955 uint16x8_t a_ = (a); \
13956 uint8x8_t result; \
13957 __asm__ ("shrn %0.8b,%1.8h,%2" \
13958 : "=w"(result) \
13959 : "w"(a_), "i"(b) \
13960 : /* No clobbers */); \
13961 result; \
13964 #define vshrn_n_u32(a, b) \
13965 __extension__ \
13966 ({ \
13967 uint32x4_t a_ = (a); \
13968 uint16x4_t result; \
13969 __asm__ ("shrn %0.4h,%1.4s,%2" \
13970 : "=w"(result) \
13971 : "w"(a_), "i"(b) \
13972 : /* No clobbers */); \
13973 result; \
13976 #define vshrn_n_u64(a, b) \
13977 __extension__ \
13978 ({ \
13979 uint64x2_t a_ = (a); \
13980 uint32x2_t result; \
13981 __asm__ ("shrn %0.2s,%1.2d,%2" \
13982 : "=w"(result) \
13983 : "w"(a_), "i"(b) \
13984 : /* No clobbers */); \
13985 result; \
13988 #define vsli_n_p8(a, b, c) \
13989 __extension__ \
13990 ({ \
13991 poly8x8_t b_ = (b); \
13992 poly8x8_t a_ = (a); \
13993 poly8x8_t result; \
13994 __asm__ ("sli %0.8b,%2.8b,%3" \
13995 : "=w"(result) \
13996 : "0"(a_), "w"(b_), "i"(c) \
13997 : /* No clobbers */); \
13998 result; \
14001 #define vsli_n_p16(a, b, c) \
14002 __extension__ \
14003 ({ \
14004 poly16x4_t b_ = (b); \
14005 poly16x4_t a_ = (a); \
14006 poly16x4_t result; \
14007 __asm__ ("sli %0.4h,%2.4h,%3" \
14008 : "=w"(result) \
14009 : "0"(a_), "w"(b_), "i"(c) \
14010 : /* No clobbers */); \
14011 result; \
14014 #define vsliq_n_p8(a, b, c) \
14015 __extension__ \
14016 ({ \
14017 poly8x16_t b_ = (b); \
14018 poly8x16_t a_ = (a); \
14019 poly8x16_t result; \
14020 __asm__ ("sli %0.16b,%2.16b,%3" \
14021 : "=w"(result) \
14022 : "0"(a_), "w"(b_), "i"(c) \
14023 : /* No clobbers */); \
14024 result; \
14027 #define vsliq_n_p16(a, b, c) \
14028 __extension__ \
14029 ({ \
14030 poly16x8_t b_ = (b); \
14031 poly16x8_t a_ = (a); \
14032 poly16x8_t result; \
14033 __asm__ ("sli %0.8h,%2.8h,%3" \
14034 : "=w"(result) \
14035 : "0"(a_), "w"(b_), "i"(c) \
14036 : /* No clobbers */); \
14037 result; \
14040 #define vsri_n_p8(a, b, c) \
14041 __extension__ \
14042 ({ \
14043 poly8x8_t b_ = (b); \
14044 poly8x8_t a_ = (a); \
14045 poly8x8_t result; \
14046 __asm__ ("sri %0.8b,%2.8b,%3" \
14047 : "=w"(result) \
14048 : "0"(a_), "w"(b_), "i"(c) \
14049 : /* No clobbers */); \
14050 result; \
14053 #define vsri_n_p16(a, b, c) \
14054 __extension__ \
14055 ({ \
14056 poly16x4_t b_ = (b); \
14057 poly16x4_t a_ = (a); \
14058 poly16x4_t result; \
14059 __asm__ ("sri %0.4h,%2.4h,%3" \
14060 : "=w"(result) \
14061 : "0"(a_), "w"(b_), "i"(c) \
14062 : /* No clobbers */); \
14063 result; \
14066 #define vsriq_n_p8(a, b, c) \
14067 __extension__ \
14068 ({ \
14069 poly8x16_t b_ = (b); \
14070 poly8x16_t a_ = (a); \
14071 poly8x16_t result; \
14072 __asm__ ("sri %0.16b,%2.16b,%3" \
14073 : "=w"(result) \
14074 : "0"(a_), "w"(b_), "i"(c) \
14075 : /* No clobbers */); \
14076 result; \
14079 #define vsriq_n_p16(a, b, c) \
14080 __extension__ \
14081 ({ \
14082 poly16x8_t b_ = (b); \
14083 poly16x8_t a_ = (a); \
14084 poly16x8_t result; \
14085 __asm__ ("sri %0.8h,%2.8h,%3" \
14086 : "=w"(result) \
14087 : "0"(a_), "w"(b_), "i"(c) \
14088 : /* No clobbers */); \
14089 result; \
14092 #define vst1_lane_f32(a, b, c) \
14093 __extension__ \
14094 ({ \
14095 float32x2_t b_ = (b); \
14096 float32_t * a_ = (a); \
14097 __asm__ ("st1 {%1.s}[%2],[%0]" \
14099 : "r"(a_), "w"(b_), "i"(c) \
14100 : "memory"); \
14103 #define vst1_lane_f64(a, b, c) \
14104 __extension__ \
14105 ({ \
14106 float64x1_t b_ = (b); \
14107 float64_t * a_ = (a); \
14108 __asm__ ("st1 {%1.d}[%2],[%0]" \
14110 : "r"(a_), "w"(b_), "i"(c) \
14111 : "memory"); \
14114 #define vst1_lane_p8(a, b, c) \
14115 __extension__ \
14116 ({ \
14117 poly8x8_t b_ = (b); \
14118 poly8_t * a_ = (a); \
14119 __asm__ ("st1 {%1.b}[%2],[%0]" \
14121 : "r"(a_), "w"(b_), "i"(c) \
14122 : "memory"); \
14125 #define vst1_lane_p16(a, b, c) \
14126 __extension__ \
14127 ({ \
14128 poly16x4_t b_ = (b); \
14129 poly16_t * a_ = (a); \
14130 __asm__ ("st1 {%1.h}[%2],[%0]" \
14132 : "r"(a_), "w"(b_), "i"(c) \
14133 : "memory"); \
14136 #define vst1_lane_s8(a, b, c) \
14137 __extension__ \
14138 ({ \
14139 int8x8_t b_ = (b); \
14140 int8_t * a_ = (a); \
14141 __asm__ ("st1 {%1.b}[%2],[%0]" \
14143 : "r"(a_), "w"(b_), "i"(c) \
14144 : "memory"); \
14147 #define vst1_lane_s16(a, b, c) \
14148 __extension__ \
14149 ({ \
14150 int16x4_t b_ = (b); \
14151 int16_t * a_ = (a); \
14152 __asm__ ("st1 {%1.h}[%2],[%0]" \
14154 : "r"(a_), "w"(b_), "i"(c) \
14155 : "memory"); \
14158 #define vst1_lane_s32(a, b, c) \
14159 __extension__ \
14160 ({ \
14161 int32x2_t b_ = (b); \
14162 int32_t * a_ = (a); \
14163 __asm__ ("st1 {%1.s}[%2],[%0]" \
14165 : "r"(a_), "w"(b_), "i"(c) \
14166 : "memory"); \
14169 #define vst1_lane_s64(a, b, c) \
14170 __extension__ \
14171 ({ \
14172 int64x1_t b_ = (b); \
14173 int64_t * a_ = (a); \
14174 __asm__ ("st1 {%1.d}[%2],[%0]" \
14176 : "r"(a_), "w"(b_), "i"(c) \
14177 : "memory"); \
14180 #define vst1_lane_u8(a, b, c) \
14181 __extension__ \
14182 ({ \
14183 uint8x8_t b_ = (b); \
14184 uint8_t * a_ = (a); \
14185 __asm__ ("st1 {%1.b}[%2],[%0]" \
14187 : "r"(a_), "w"(b_), "i"(c) \
14188 : "memory"); \
14191 #define vst1_lane_u16(a, b, c) \
14192 __extension__ \
14193 ({ \
14194 uint16x4_t b_ = (b); \
14195 uint16_t * a_ = (a); \
14196 __asm__ ("st1 {%1.h}[%2],[%0]" \
14198 : "r"(a_), "w"(b_), "i"(c) \
14199 : "memory"); \
14202 #define vst1_lane_u32(a, b, c) \
14203 __extension__ \
14204 ({ \
14205 uint32x2_t b_ = (b); \
14206 uint32_t * a_ = (a); \
14207 __asm__ ("st1 {%1.s}[%2],[%0]" \
14209 : "r"(a_), "w"(b_), "i"(c) \
14210 : "memory"); \
14213 #define vst1_lane_u64(a, b, c) \
14214 __extension__ \
14215 ({ \
14216 uint64x1_t b_ = (b); \
14217 uint64_t * a_ = (a); \
14218 __asm__ ("st1 {%1.d}[%2],[%0]" \
14220 : "r"(a_), "w"(b_), "i"(c) \
14221 : "memory"); \
14225 #define vst1q_lane_f32(a, b, c) \
14226 __extension__ \
14227 ({ \
14228 float32x4_t b_ = (b); \
14229 float32_t * a_ = (a); \
14230 __asm__ ("st1 {%1.s}[%2],[%0]" \
14232 : "r"(a_), "w"(b_), "i"(c) \
14233 : "memory"); \
14236 #define vst1q_lane_f64(a, b, c) \
14237 __extension__ \
14238 ({ \
14239 float64x2_t b_ = (b); \
14240 float64_t * a_ = (a); \
14241 __asm__ ("st1 {%1.d}[%2],[%0]" \
14243 : "r"(a_), "w"(b_), "i"(c) \
14244 : "memory"); \
14247 #define vst1q_lane_p8(a, b, c) \
14248 __extension__ \
14249 ({ \
14250 poly8x16_t b_ = (b); \
14251 poly8_t * a_ = (a); \
14252 __asm__ ("st1 {%1.b}[%2],[%0]" \
14254 : "r"(a_), "w"(b_), "i"(c) \
14255 : "memory"); \
14258 #define vst1q_lane_p16(a, b, c) \
14259 __extension__ \
14260 ({ \
14261 poly16x8_t b_ = (b); \
14262 poly16_t * a_ = (a); \
14263 __asm__ ("st1 {%1.h}[%2],[%0]" \
14265 : "r"(a_), "w"(b_), "i"(c) \
14266 : "memory"); \
14269 #define vst1q_lane_s8(a, b, c) \
14270 __extension__ \
14271 ({ \
14272 int8x16_t b_ = (b); \
14273 int8_t * a_ = (a); \
14274 __asm__ ("st1 {%1.b}[%2],[%0]" \
14276 : "r"(a_), "w"(b_), "i"(c) \
14277 : "memory"); \
14280 #define vst1q_lane_s16(a, b, c) \
14281 __extension__ \
14282 ({ \
14283 int16x8_t b_ = (b); \
14284 int16_t * a_ = (a); \
14285 __asm__ ("st1 {%1.h}[%2],[%0]" \
14287 : "r"(a_), "w"(b_), "i"(c) \
14288 : "memory"); \
14291 #define vst1q_lane_s32(a, b, c) \
14292 __extension__ \
14293 ({ \
14294 int32x4_t b_ = (b); \
14295 int32_t * a_ = (a); \
14296 __asm__ ("st1 {%1.s}[%2],[%0]" \
14298 : "r"(a_), "w"(b_), "i"(c) \
14299 : "memory"); \
14302 #define vst1q_lane_s64(a, b, c) \
14303 __extension__ \
14304 ({ \
14305 int64x2_t b_ = (b); \
14306 int64_t * a_ = (a); \
14307 __asm__ ("st1 {%1.d}[%2],[%0]" \
14309 : "r"(a_), "w"(b_), "i"(c) \
14310 : "memory"); \
14313 #define vst1q_lane_u8(a, b, c) \
14314 __extension__ \
14315 ({ \
14316 uint8x16_t b_ = (b); \
14317 uint8_t * a_ = (a); \
14318 __asm__ ("st1 {%1.b}[%2],[%0]" \
14320 : "r"(a_), "w"(b_), "i"(c) \
14321 : "memory"); \
14324 #define vst1q_lane_u16(a, b, c) \
14325 __extension__ \
14326 ({ \
14327 uint16x8_t b_ = (b); \
14328 uint16_t * a_ = (a); \
14329 __asm__ ("st1 {%1.h}[%2],[%0]" \
14331 : "r"(a_), "w"(b_), "i"(c) \
14332 : "memory"); \
14335 #define vst1q_lane_u32(a, b, c) \
14336 __extension__ \
14337 ({ \
14338 uint32x4_t b_ = (b); \
14339 uint32_t * a_ = (a); \
14340 __asm__ ("st1 {%1.s}[%2],[%0]" \
14342 : "r"(a_), "w"(b_), "i"(c) \
14343 : "memory"); \
14346 #define vst1q_lane_u64(a, b, c) \
14347 __extension__ \
14348 ({ \
14349 uint64x2_t b_ = (b); \
14350 uint64_t * a_ = (a); \
14351 __asm__ ("st1 {%1.d}[%2],[%0]" \
14353 : "r"(a_), "w"(b_), "i"(c) \
14354 : "memory"); \
14357 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14358 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
14360 int8x16_t result = vcombine_s8 (a, vcreate_s8 (UINT64_C (0x0)));
14361 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
14362 : "+w"(result)
14363 : "w"(b), "w"(c)
14364 : /* No clobbers */);
14365 return result;
14368 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14369 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
14371 int16x8_t result = vcombine_s16 (a, vcreate_s16 (UINT64_C (0x0)));
14372 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
14373 : "+w"(result)
14374 : "w"(b), "w"(c)
14375 : /* No clobbers */);
14376 return result;
14379 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14380 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
14382 int32x4_t result = vcombine_s32 (a, vcreate_s32 (UINT64_C (0x0)));
14383 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
14384 : "+w"(result)
14385 : "w"(b), "w"(c)
14386 : /* No clobbers */);
14387 return result;
14390 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14391 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
14393 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (UINT64_C (0x0)));
14394 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
14395 : "+w"(result)
14396 : "w"(b), "w"(c)
14397 : /* No clobbers */);
14398 return result;
14401 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14402 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
14404 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (UINT64_C (0x0)));
14405 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
14406 : "+w"(result)
14407 : "w"(b), "w"(c)
14408 : /* No clobbers */);
14409 return result;
14412 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14413 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
14415 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (UINT64_C (0x0)));
14416 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
14417 : "+w"(result)
14418 : "w"(b), "w"(c)
14419 : /* No clobbers */);
14420 return result;
14423 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14424 vsubhn_s16 (int16x8_t a, int16x8_t b)
14426 int8x8_t result;
14427 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
14428 : "=w"(result)
14429 : "w"(a), "w"(b)
14430 : /* No clobbers */);
14431 return result;
14434 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14435 vsubhn_s32 (int32x4_t a, int32x4_t b)
14437 int16x4_t result;
14438 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
14439 : "=w"(result)
14440 : "w"(a), "w"(b)
14441 : /* No clobbers */);
14442 return result;
14445 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14446 vsubhn_s64 (int64x2_t a, int64x2_t b)
14448 int32x2_t result;
14449 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
14450 : "=w"(result)
14451 : "w"(a), "w"(b)
14452 : /* No clobbers */);
14453 return result;
14456 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14457 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
14459 uint8x8_t result;
14460 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
14461 : "=w"(result)
14462 : "w"(a), "w"(b)
14463 : /* No clobbers */);
14464 return result;
14467 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14468 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
14470 uint16x4_t result;
14471 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
14472 : "=w"(result)
14473 : "w"(a), "w"(b)
14474 : /* No clobbers */);
14475 return result;
14478 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14479 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
14481 uint32x2_t result;
14482 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
14483 : "=w"(result)
14484 : "w"(a), "w"(b)
14485 : /* No clobbers */);
14486 return result;
14489 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14490 vtrn1_f32 (float32x2_t a, float32x2_t b)
14492 float32x2_t result;
14493 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
14494 : "=w"(result)
14495 : "w"(a), "w"(b)
14496 : /* No clobbers */);
14497 return result;
14500 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14501 vtrn1_p8 (poly8x8_t a, poly8x8_t b)
14503 poly8x8_t result;
14504 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
14505 : "=w"(result)
14506 : "w"(a), "w"(b)
14507 : /* No clobbers */);
14508 return result;
14511 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14512 vtrn1_p16 (poly16x4_t a, poly16x4_t b)
14514 poly16x4_t result;
14515 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
14516 : "=w"(result)
14517 : "w"(a), "w"(b)
14518 : /* No clobbers */);
14519 return result;
14522 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14523 vtrn1_s8 (int8x8_t a, int8x8_t b)
14525 int8x8_t result;
14526 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
14527 : "=w"(result)
14528 : "w"(a), "w"(b)
14529 : /* No clobbers */);
14530 return result;
14533 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14534 vtrn1_s16 (int16x4_t a, int16x4_t b)
14536 int16x4_t result;
14537 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
14538 : "=w"(result)
14539 : "w"(a), "w"(b)
14540 : /* No clobbers */);
14541 return result;
14544 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14545 vtrn1_s32 (int32x2_t a, int32x2_t b)
14547 int32x2_t result;
14548 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
14549 : "=w"(result)
14550 : "w"(a), "w"(b)
14551 : /* No clobbers */);
14552 return result;
14555 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14556 vtrn1_u8 (uint8x8_t a, uint8x8_t b)
14558 uint8x8_t result;
14559 __asm__ ("trn1 %0.8b,%1.8b,%2.8b"
14560 : "=w"(result)
14561 : "w"(a), "w"(b)
14562 : /* No clobbers */);
14563 return result;
14566 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14567 vtrn1_u16 (uint16x4_t a, uint16x4_t b)
14569 uint16x4_t result;
14570 __asm__ ("trn1 %0.4h,%1.4h,%2.4h"
14571 : "=w"(result)
14572 : "w"(a), "w"(b)
14573 : /* No clobbers */);
14574 return result;
14577 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14578 vtrn1_u32 (uint32x2_t a, uint32x2_t b)
14580 uint32x2_t result;
14581 __asm__ ("trn1 %0.2s,%1.2s,%2.2s"
14582 : "=w"(result)
14583 : "w"(a), "w"(b)
14584 : /* No clobbers */);
14585 return result;
14588 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14589 vtrn1q_f32 (float32x4_t a, float32x4_t b)
14591 float32x4_t result;
14592 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
14593 : "=w"(result)
14594 : "w"(a), "w"(b)
14595 : /* No clobbers */);
14596 return result;
14599 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14600 vtrn1q_f64 (float64x2_t a, float64x2_t b)
14602 float64x2_t result;
14603 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
14604 : "=w"(result)
14605 : "w"(a), "w"(b)
14606 : /* No clobbers */);
14607 return result;
14610 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14611 vtrn1q_p8 (poly8x16_t a, poly8x16_t b)
14613 poly8x16_t result;
14614 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
14615 : "=w"(result)
14616 : "w"(a), "w"(b)
14617 : /* No clobbers */);
14618 return result;
14621 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14622 vtrn1q_p16 (poly16x8_t a, poly16x8_t b)
14624 poly16x8_t result;
14625 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
14626 : "=w"(result)
14627 : "w"(a), "w"(b)
14628 : /* No clobbers */);
14629 return result;
14632 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14633 vtrn1q_s8 (int8x16_t a, int8x16_t b)
14635 int8x16_t result;
14636 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
14637 : "=w"(result)
14638 : "w"(a), "w"(b)
14639 : /* No clobbers */);
14640 return result;
14643 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14644 vtrn1q_s16 (int16x8_t a, int16x8_t b)
14646 int16x8_t result;
14647 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
14648 : "=w"(result)
14649 : "w"(a), "w"(b)
14650 : /* No clobbers */);
14651 return result;
14654 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14655 vtrn1q_s32 (int32x4_t a, int32x4_t b)
14657 int32x4_t result;
14658 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
14659 : "=w"(result)
14660 : "w"(a), "w"(b)
14661 : /* No clobbers */);
14662 return result;
14665 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14666 vtrn1q_s64 (int64x2_t a, int64x2_t b)
14668 int64x2_t result;
14669 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
14670 : "=w"(result)
14671 : "w"(a), "w"(b)
14672 : /* No clobbers */);
14673 return result;
14676 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14677 vtrn1q_u8 (uint8x16_t a, uint8x16_t b)
14679 uint8x16_t result;
14680 __asm__ ("trn1 %0.16b,%1.16b,%2.16b"
14681 : "=w"(result)
14682 : "w"(a), "w"(b)
14683 : /* No clobbers */);
14684 return result;
14687 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14688 vtrn1q_u16 (uint16x8_t a, uint16x8_t b)
14690 uint16x8_t result;
14691 __asm__ ("trn1 %0.8h,%1.8h,%2.8h"
14692 : "=w"(result)
14693 : "w"(a), "w"(b)
14694 : /* No clobbers */);
14695 return result;
14698 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14699 vtrn1q_u32 (uint32x4_t a, uint32x4_t b)
14701 uint32x4_t result;
14702 __asm__ ("trn1 %0.4s,%1.4s,%2.4s"
14703 : "=w"(result)
14704 : "w"(a), "w"(b)
14705 : /* No clobbers */);
14706 return result;
14709 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14710 vtrn1q_u64 (uint64x2_t a, uint64x2_t b)
14712 uint64x2_t result;
14713 __asm__ ("trn1 %0.2d,%1.2d,%2.2d"
14714 : "=w"(result)
14715 : "w"(a), "w"(b)
14716 : /* No clobbers */);
14717 return result;
14720 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14721 vtrn2_f32 (float32x2_t a, float32x2_t b)
14723 float32x2_t result;
14724 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14725 : "=w"(result)
14726 : "w"(a), "w"(b)
14727 : /* No clobbers */);
14728 return result;
14731 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
14732 vtrn2_p8 (poly8x8_t a, poly8x8_t b)
14734 poly8x8_t result;
14735 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14736 : "=w"(result)
14737 : "w"(a), "w"(b)
14738 : /* No clobbers */);
14739 return result;
14742 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
14743 vtrn2_p16 (poly16x4_t a, poly16x4_t b)
14745 poly16x4_t result;
14746 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14747 : "=w"(result)
14748 : "w"(a), "w"(b)
14749 : /* No clobbers */);
14750 return result;
14753 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
14754 vtrn2_s8 (int8x8_t a, int8x8_t b)
14756 int8x8_t result;
14757 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14758 : "=w"(result)
14759 : "w"(a), "w"(b)
14760 : /* No clobbers */);
14761 return result;
14764 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
14765 vtrn2_s16 (int16x4_t a, int16x4_t b)
14767 int16x4_t result;
14768 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14769 : "=w"(result)
14770 : "w"(a), "w"(b)
14771 : /* No clobbers */);
14772 return result;
14775 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
14776 vtrn2_s32 (int32x2_t a, int32x2_t b)
14778 int32x2_t result;
14779 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14780 : "=w"(result)
14781 : "w"(a), "w"(b)
14782 : /* No clobbers */);
14783 return result;
14786 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14787 vtrn2_u8 (uint8x8_t a, uint8x8_t b)
14789 uint8x8_t result;
14790 __asm__ ("trn2 %0.8b,%1.8b,%2.8b"
14791 : "=w"(result)
14792 : "w"(a), "w"(b)
14793 : /* No clobbers */);
14794 return result;
14797 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14798 vtrn2_u16 (uint16x4_t a, uint16x4_t b)
14800 uint16x4_t result;
14801 __asm__ ("trn2 %0.4h,%1.4h,%2.4h"
14802 : "=w"(result)
14803 : "w"(a), "w"(b)
14804 : /* No clobbers */);
14805 return result;
14808 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14809 vtrn2_u32 (uint32x2_t a, uint32x2_t b)
14811 uint32x2_t result;
14812 __asm__ ("trn2 %0.2s,%1.2s,%2.2s"
14813 : "=w"(result)
14814 : "w"(a), "w"(b)
14815 : /* No clobbers */);
14816 return result;
14819 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
14820 vtrn2q_f32 (float32x4_t a, float32x4_t b)
14822 float32x4_t result;
14823 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14824 : "=w"(result)
14825 : "w"(a), "w"(b)
14826 : /* No clobbers */);
14827 return result;
14830 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
14831 vtrn2q_f64 (float64x2_t a, float64x2_t b)
14833 float64x2_t result;
14834 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14835 : "=w"(result)
14836 : "w"(a), "w"(b)
14837 : /* No clobbers */);
14838 return result;
14841 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
14842 vtrn2q_p8 (poly8x16_t a, poly8x16_t b)
14844 poly8x16_t result;
14845 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14846 : "=w"(result)
14847 : "w"(a), "w"(b)
14848 : /* No clobbers */);
14849 return result;
14852 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
14853 vtrn2q_p16 (poly16x8_t a, poly16x8_t b)
14855 poly16x8_t result;
14856 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14857 : "=w"(result)
14858 : "w"(a), "w"(b)
14859 : /* No clobbers */);
14860 return result;
14863 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
14864 vtrn2q_s8 (int8x16_t a, int8x16_t b)
14866 int8x16_t result;
14867 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14868 : "=w"(result)
14869 : "w"(a), "w"(b)
14870 : /* No clobbers */);
14871 return result;
14874 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
14875 vtrn2q_s16 (int16x8_t a, int16x8_t b)
14877 int16x8_t result;
14878 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14879 : "=w"(result)
14880 : "w"(a), "w"(b)
14881 : /* No clobbers */);
14882 return result;
14885 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
14886 vtrn2q_s32 (int32x4_t a, int32x4_t b)
14888 int32x4_t result;
14889 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14890 : "=w"(result)
14891 : "w"(a), "w"(b)
14892 : /* No clobbers */);
14893 return result;
14896 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
14897 vtrn2q_s64 (int64x2_t a, int64x2_t b)
14899 int64x2_t result;
14900 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14901 : "=w"(result)
14902 : "w"(a), "w"(b)
14903 : /* No clobbers */);
14904 return result;
14907 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14908 vtrn2q_u8 (uint8x16_t a, uint8x16_t b)
14910 uint8x16_t result;
14911 __asm__ ("trn2 %0.16b,%1.16b,%2.16b"
14912 : "=w"(result)
14913 : "w"(a), "w"(b)
14914 : /* No clobbers */);
14915 return result;
14918 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14919 vtrn2q_u16 (uint16x8_t a, uint16x8_t b)
14921 uint16x8_t result;
14922 __asm__ ("trn2 %0.8h,%1.8h,%2.8h"
14923 : "=w"(result)
14924 : "w"(a), "w"(b)
14925 : /* No clobbers */);
14926 return result;
14929 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14930 vtrn2q_u32 (uint32x4_t a, uint32x4_t b)
14932 uint32x4_t result;
14933 __asm__ ("trn2 %0.4s,%1.4s,%2.4s"
14934 : "=w"(result)
14935 : "w"(a), "w"(b)
14936 : /* No clobbers */);
14937 return result;
14940 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14941 vtrn2q_u64 (uint64x2_t a, uint64x2_t b)
14943 uint64x2_t result;
14944 __asm__ ("trn2 %0.2d,%1.2d,%2.2d"
14945 : "=w"(result)
14946 : "w"(a), "w"(b)
14947 : /* No clobbers */);
14948 return result;
14951 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14952 vtst_p8 (poly8x8_t a, poly8x8_t b)
14954 uint8x8_t result;
14955 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
14956 : "=w"(result)
14957 : "w"(a), "w"(b)
14958 : /* No clobbers */);
14959 return result;
14962 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14963 vtst_p16 (poly16x4_t a, poly16x4_t b)
14965 uint16x4_t result;
14966 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
14967 : "=w"(result)
14968 : "w"(a), "w"(b)
14969 : /* No clobbers */);
14970 return result;
14973 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14974 vtstq_p8 (poly8x16_t a, poly8x16_t b)
14976 uint8x16_t result;
14977 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
14978 : "=w"(result)
14979 : "w"(a), "w"(b)
14980 : /* No clobbers */);
14981 return result;
14984 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14985 vtstq_p16 (poly16x8_t a, poly16x8_t b)
14987 uint16x8_t result;
14988 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
14989 : "=w"(result)
14990 : "w"(a), "w"(b)
14991 : /* No clobbers */);
14992 return result;
14994 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
14995 vuzp1_f32 (float32x2_t a, float32x2_t b)
14997 float32x2_t result;
14998 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
14999 : "=w"(result)
15000 : "w"(a), "w"(b)
15001 : /* No clobbers */);
15002 return result;
15005 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15006 vuzp1_p8 (poly8x8_t a, poly8x8_t b)
15008 poly8x8_t result;
15009 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
15010 : "=w"(result)
15011 : "w"(a), "w"(b)
15012 : /* No clobbers */);
15013 return result;
15016 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15017 vuzp1_p16 (poly16x4_t a, poly16x4_t b)
15019 poly16x4_t result;
15020 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
15021 : "=w"(result)
15022 : "w"(a), "w"(b)
15023 : /* No clobbers */);
15024 return result;
15027 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15028 vuzp1_s8 (int8x8_t a, int8x8_t b)
15030 int8x8_t result;
15031 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
15032 : "=w"(result)
15033 : "w"(a), "w"(b)
15034 : /* No clobbers */);
15035 return result;
15038 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15039 vuzp1_s16 (int16x4_t a, int16x4_t b)
15041 int16x4_t result;
15042 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
15043 : "=w"(result)
15044 : "w"(a), "w"(b)
15045 : /* No clobbers */);
15046 return result;
15049 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15050 vuzp1_s32 (int32x2_t a, int32x2_t b)
15052 int32x2_t result;
15053 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
15054 : "=w"(result)
15055 : "w"(a), "w"(b)
15056 : /* No clobbers */);
15057 return result;
15060 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15061 vuzp1_u8 (uint8x8_t a, uint8x8_t b)
15063 uint8x8_t result;
15064 __asm__ ("uzp1 %0.8b,%1.8b,%2.8b"
15065 : "=w"(result)
15066 : "w"(a), "w"(b)
15067 : /* No clobbers */);
15068 return result;
15071 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15072 vuzp1_u16 (uint16x4_t a, uint16x4_t b)
15074 uint16x4_t result;
15075 __asm__ ("uzp1 %0.4h,%1.4h,%2.4h"
15076 : "=w"(result)
15077 : "w"(a), "w"(b)
15078 : /* No clobbers */);
15079 return result;
15082 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15083 vuzp1_u32 (uint32x2_t a, uint32x2_t b)
15085 uint32x2_t result;
15086 __asm__ ("uzp1 %0.2s,%1.2s,%2.2s"
15087 : "=w"(result)
15088 : "w"(a), "w"(b)
15089 : /* No clobbers */);
15090 return result;
15093 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15094 vuzp1q_f32 (float32x4_t a, float32x4_t b)
15096 float32x4_t result;
15097 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
15098 : "=w"(result)
15099 : "w"(a), "w"(b)
15100 : /* No clobbers */);
15101 return result;
15104 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15105 vuzp1q_f64 (float64x2_t a, float64x2_t b)
15107 float64x2_t result;
15108 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
15109 : "=w"(result)
15110 : "w"(a), "w"(b)
15111 : /* No clobbers */);
15112 return result;
15115 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15116 vuzp1q_p8 (poly8x16_t a, poly8x16_t b)
15118 poly8x16_t result;
15119 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
15120 : "=w"(result)
15121 : "w"(a), "w"(b)
15122 : /* No clobbers */);
15123 return result;
15126 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15127 vuzp1q_p16 (poly16x8_t a, poly16x8_t b)
15129 poly16x8_t result;
15130 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
15131 : "=w"(result)
15132 : "w"(a), "w"(b)
15133 : /* No clobbers */);
15134 return result;
15137 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15138 vuzp1q_s8 (int8x16_t a, int8x16_t b)
15140 int8x16_t result;
15141 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
15142 : "=w"(result)
15143 : "w"(a), "w"(b)
15144 : /* No clobbers */);
15145 return result;
15148 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15149 vuzp1q_s16 (int16x8_t a, int16x8_t b)
15151 int16x8_t result;
15152 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
15153 : "=w"(result)
15154 : "w"(a), "w"(b)
15155 : /* No clobbers */);
15156 return result;
15159 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15160 vuzp1q_s32 (int32x4_t a, int32x4_t b)
15162 int32x4_t result;
15163 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
15164 : "=w"(result)
15165 : "w"(a), "w"(b)
15166 : /* No clobbers */);
15167 return result;
15170 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15171 vuzp1q_s64 (int64x2_t a, int64x2_t b)
15173 int64x2_t result;
15174 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
15175 : "=w"(result)
15176 : "w"(a), "w"(b)
15177 : /* No clobbers */);
15178 return result;
15181 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15182 vuzp1q_u8 (uint8x16_t a, uint8x16_t b)
15184 uint8x16_t result;
15185 __asm__ ("uzp1 %0.16b,%1.16b,%2.16b"
15186 : "=w"(result)
15187 : "w"(a), "w"(b)
15188 : /* No clobbers */);
15189 return result;
15192 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15193 vuzp1q_u16 (uint16x8_t a, uint16x8_t b)
15195 uint16x8_t result;
15196 __asm__ ("uzp1 %0.8h,%1.8h,%2.8h"
15197 : "=w"(result)
15198 : "w"(a), "w"(b)
15199 : /* No clobbers */);
15200 return result;
15203 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15204 vuzp1q_u32 (uint32x4_t a, uint32x4_t b)
15206 uint32x4_t result;
15207 __asm__ ("uzp1 %0.4s,%1.4s,%2.4s"
15208 : "=w"(result)
15209 : "w"(a), "w"(b)
15210 : /* No clobbers */);
15211 return result;
15214 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15215 vuzp1q_u64 (uint64x2_t a, uint64x2_t b)
15217 uint64x2_t result;
15218 __asm__ ("uzp1 %0.2d,%1.2d,%2.2d"
15219 : "=w"(result)
15220 : "w"(a), "w"(b)
15221 : /* No clobbers */);
15222 return result;
15225 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15226 vuzp2_f32 (float32x2_t a, float32x2_t b)
15228 float32x2_t result;
15229 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
15230 : "=w"(result)
15231 : "w"(a), "w"(b)
15232 : /* No clobbers */);
15233 return result;
15236 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15237 vuzp2_p8 (poly8x8_t a, poly8x8_t b)
15239 poly8x8_t result;
15240 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
15241 : "=w"(result)
15242 : "w"(a), "w"(b)
15243 : /* No clobbers */);
15244 return result;
15247 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15248 vuzp2_p16 (poly16x4_t a, poly16x4_t b)
15250 poly16x4_t result;
15251 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
15252 : "=w"(result)
15253 : "w"(a), "w"(b)
15254 : /* No clobbers */);
15255 return result;
15258 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15259 vuzp2_s8 (int8x8_t a, int8x8_t b)
15261 int8x8_t result;
15262 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
15263 : "=w"(result)
15264 : "w"(a), "w"(b)
15265 : /* No clobbers */);
15266 return result;
15269 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15270 vuzp2_s16 (int16x4_t a, int16x4_t b)
15272 int16x4_t result;
15273 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
15274 : "=w"(result)
15275 : "w"(a), "w"(b)
15276 : /* No clobbers */);
15277 return result;
15280 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15281 vuzp2_s32 (int32x2_t a, int32x2_t b)
15283 int32x2_t result;
15284 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
15285 : "=w"(result)
15286 : "w"(a), "w"(b)
15287 : /* No clobbers */);
15288 return result;
15291 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15292 vuzp2_u8 (uint8x8_t a, uint8x8_t b)
15294 uint8x8_t result;
15295 __asm__ ("uzp2 %0.8b,%1.8b,%2.8b"
15296 : "=w"(result)
15297 : "w"(a), "w"(b)
15298 : /* No clobbers */);
15299 return result;
15302 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15303 vuzp2_u16 (uint16x4_t a, uint16x4_t b)
15305 uint16x4_t result;
15306 __asm__ ("uzp2 %0.4h,%1.4h,%2.4h"
15307 : "=w"(result)
15308 : "w"(a), "w"(b)
15309 : /* No clobbers */);
15310 return result;
15313 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15314 vuzp2_u32 (uint32x2_t a, uint32x2_t b)
15316 uint32x2_t result;
15317 __asm__ ("uzp2 %0.2s,%1.2s,%2.2s"
15318 : "=w"(result)
15319 : "w"(a), "w"(b)
15320 : /* No clobbers */);
15321 return result;
15324 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15325 vuzp2q_f32 (float32x4_t a, float32x4_t b)
15327 float32x4_t result;
15328 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
15329 : "=w"(result)
15330 : "w"(a), "w"(b)
15331 : /* No clobbers */);
15332 return result;
15335 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15336 vuzp2q_f64 (float64x2_t a, float64x2_t b)
15338 float64x2_t result;
15339 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
15340 : "=w"(result)
15341 : "w"(a), "w"(b)
15342 : /* No clobbers */);
15343 return result;
15346 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15347 vuzp2q_p8 (poly8x16_t a, poly8x16_t b)
15349 poly8x16_t result;
15350 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
15351 : "=w"(result)
15352 : "w"(a), "w"(b)
15353 : /* No clobbers */);
15354 return result;
15357 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15358 vuzp2q_p16 (poly16x8_t a, poly16x8_t b)
15360 poly16x8_t result;
15361 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
15362 : "=w"(result)
15363 : "w"(a), "w"(b)
15364 : /* No clobbers */);
15365 return result;
15368 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15369 vuzp2q_s8 (int8x16_t a, int8x16_t b)
15371 int8x16_t result;
15372 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
15373 : "=w"(result)
15374 : "w"(a), "w"(b)
15375 : /* No clobbers */);
15376 return result;
15379 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15380 vuzp2q_s16 (int16x8_t a, int16x8_t b)
15382 int16x8_t result;
15383 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
15384 : "=w"(result)
15385 : "w"(a), "w"(b)
15386 : /* No clobbers */);
15387 return result;
15390 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15391 vuzp2q_s32 (int32x4_t a, int32x4_t b)
15393 int32x4_t result;
15394 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
15395 : "=w"(result)
15396 : "w"(a), "w"(b)
15397 : /* No clobbers */);
15398 return result;
15401 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15402 vuzp2q_s64 (int64x2_t a, int64x2_t b)
15404 int64x2_t result;
15405 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
15406 : "=w"(result)
15407 : "w"(a), "w"(b)
15408 : /* No clobbers */);
15409 return result;
15412 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15413 vuzp2q_u8 (uint8x16_t a, uint8x16_t b)
15415 uint8x16_t result;
15416 __asm__ ("uzp2 %0.16b,%1.16b,%2.16b"
15417 : "=w"(result)
15418 : "w"(a), "w"(b)
15419 : /* No clobbers */);
15420 return result;
15423 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15424 vuzp2q_u16 (uint16x8_t a, uint16x8_t b)
15426 uint16x8_t result;
15427 __asm__ ("uzp2 %0.8h,%1.8h,%2.8h"
15428 : "=w"(result)
15429 : "w"(a), "w"(b)
15430 : /* No clobbers */);
15431 return result;
15434 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15435 vuzp2q_u32 (uint32x4_t a, uint32x4_t b)
15437 uint32x4_t result;
15438 __asm__ ("uzp2 %0.4s,%1.4s,%2.4s"
15439 : "=w"(result)
15440 : "w"(a), "w"(b)
15441 : /* No clobbers */);
15442 return result;
15445 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15446 vuzp2q_u64 (uint64x2_t a, uint64x2_t b)
15448 uint64x2_t result;
15449 __asm__ ("uzp2 %0.2d,%1.2d,%2.2d"
15450 : "=w"(result)
15451 : "w"(a), "w"(b)
15452 : /* No clobbers */);
15453 return result;
15456 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15457 vzip1_f32 (float32x2_t a, float32x2_t b)
15459 float32x2_t result;
15460 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
15461 : "=w"(result)
15462 : "w"(a), "w"(b)
15463 : /* No clobbers */);
15464 return result;
15467 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15468 vzip1_p8 (poly8x8_t a, poly8x8_t b)
15470 poly8x8_t result;
15471 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
15472 : "=w"(result)
15473 : "w"(a), "w"(b)
15474 : /* No clobbers */);
15475 return result;
15478 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15479 vzip1_p16 (poly16x4_t a, poly16x4_t b)
15481 poly16x4_t result;
15482 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
15483 : "=w"(result)
15484 : "w"(a), "w"(b)
15485 : /* No clobbers */);
15486 return result;
15489 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15490 vzip1_s8 (int8x8_t a, int8x8_t b)
15492 int8x8_t result;
15493 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
15494 : "=w"(result)
15495 : "w"(a), "w"(b)
15496 : /* No clobbers */);
15497 return result;
15500 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15501 vzip1_s16 (int16x4_t a, int16x4_t b)
15503 int16x4_t result;
15504 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
15505 : "=w"(result)
15506 : "w"(a), "w"(b)
15507 : /* No clobbers */);
15508 return result;
15511 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15512 vzip1_s32 (int32x2_t a, int32x2_t b)
15514 int32x2_t result;
15515 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
15516 : "=w"(result)
15517 : "w"(a), "w"(b)
15518 : /* No clobbers */);
15519 return result;
15522 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15523 vzip1_u8 (uint8x8_t a, uint8x8_t b)
15525 uint8x8_t result;
15526 __asm__ ("zip1 %0.8b,%1.8b,%2.8b"
15527 : "=w"(result)
15528 : "w"(a), "w"(b)
15529 : /* No clobbers */);
15530 return result;
15533 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15534 vzip1_u16 (uint16x4_t a, uint16x4_t b)
15536 uint16x4_t result;
15537 __asm__ ("zip1 %0.4h,%1.4h,%2.4h"
15538 : "=w"(result)
15539 : "w"(a), "w"(b)
15540 : /* No clobbers */);
15541 return result;
15544 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15545 vzip1_u32 (uint32x2_t a, uint32x2_t b)
15547 uint32x2_t result;
15548 __asm__ ("zip1 %0.2s,%1.2s,%2.2s"
15549 : "=w"(result)
15550 : "w"(a), "w"(b)
15551 : /* No clobbers */);
15552 return result;
15555 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15556 vzip1q_f32 (float32x4_t a, float32x4_t b)
15558 float32x4_t result;
15559 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
15560 : "=w"(result)
15561 : "w"(a), "w"(b)
15562 : /* No clobbers */);
15563 return result;
15566 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15567 vzip1q_f64 (float64x2_t a, float64x2_t b)
15569 float64x2_t result;
15570 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
15571 : "=w"(result)
15572 : "w"(a), "w"(b)
15573 : /* No clobbers */);
15574 return result;
15577 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15578 vzip1q_p8 (poly8x16_t a, poly8x16_t b)
15580 poly8x16_t result;
15581 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
15582 : "=w"(result)
15583 : "w"(a), "w"(b)
15584 : /* No clobbers */);
15585 return result;
15588 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15589 vzip1q_p16 (poly16x8_t a, poly16x8_t b)
15591 poly16x8_t result;
15592 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
15593 : "=w"(result)
15594 : "w"(a), "w"(b)
15595 : /* No clobbers */);
15596 return result;
15599 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15600 vzip1q_s8 (int8x16_t a, int8x16_t b)
15602 int8x16_t result;
15603 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
15604 : "=w"(result)
15605 : "w"(a), "w"(b)
15606 : /* No clobbers */);
15607 return result;
15610 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15611 vzip1q_s16 (int16x8_t a, int16x8_t b)
15613 int16x8_t result;
15614 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
15615 : "=w"(result)
15616 : "w"(a), "w"(b)
15617 : /* No clobbers */);
15618 return result;
15621 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15622 vzip1q_s32 (int32x4_t a, int32x4_t b)
15624 int32x4_t result;
15625 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
15626 : "=w"(result)
15627 : "w"(a), "w"(b)
15628 : /* No clobbers */);
15629 return result;
15632 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15633 vzip1q_s64 (int64x2_t a, int64x2_t b)
15635 int64x2_t result;
15636 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
15637 : "=w"(result)
15638 : "w"(a), "w"(b)
15639 : /* No clobbers */);
15640 return result;
15643 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15644 vzip1q_u8 (uint8x16_t a, uint8x16_t b)
15646 uint8x16_t result;
15647 __asm__ ("zip1 %0.16b,%1.16b,%2.16b"
15648 : "=w"(result)
15649 : "w"(a), "w"(b)
15650 : /* No clobbers */);
15651 return result;
15654 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15655 vzip1q_u16 (uint16x8_t a, uint16x8_t b)
15657 uint16x8_t result;
15658 __asm__ ("zip1 %0.8h,%1.8h,%2.8h"
15659 : "=w"(result)
15660 : "w"(a), "w"(b)
15661 : /* No clobbers */);
15662 return result;
15665 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15666 vzip1q_u32 (uint32x4_t a, uint32x4_t b)
15668 uint32x4_t result;
15669 __asm__ ("zip1 %0.4s,%1.4s,%2.4s"
15670 : "=w"(result)
15671 : "w"(a), "w"(b)
15672 : /* No clobbers */);
15673 return result;
15676 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15677 vzip1q_u64 (uint64x2_t a, uint64x2_t b)
15679 uint64x2_t result;
15680 __asm__ ("zip1 %0.2d,%1.2d,%2.2d"
15681 : "=w"(result)
15682 : "w"(a), "w"(b)
15683 : /* No clobbers */);
15684 return result;
15687 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15688 vzip2_f32 (float32x2_t a, float32x2_t b)
15690 float32x2_t result;
15691 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15692 : "=w"(result)
15693 : "w"(a), "w"(b)
15694 : /* No clobbers */);
15695 return result;
15698 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15699 vzip2_p8 (poly8x8_t a, poly8x8_t b)
15701 poly8x8_t result;
15702 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15703 : "=w"(result)
15704 : "w"(a), "w"(b)
15705 : /* No clobbers */);
15706 return result;
15709 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15710 vzip2_p16 (poly16x4_t a, poly16x4_t b)
15712 poly16x4_t result;
15713 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15714 : "=w"(result)
15715 : "w"(a), "w"(b)
15716 : /* No clobbers */);
15717 return result;
15720 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15721 vzip2_s8 (int8x8_t a, int8x8_t b)
15723 int8x8_t result;
15724 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15725 : "=w"(result)
15726 : "w"(a), "w"(b)
15727 : /* No clobbers */);
15728 return result;
15731 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15732 vzip2_s16 (int16x4_t a, int16x4_t b)
15734 int16x4_t result;
15735 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15736 : "=w"(result)
15737 : "w"(a), "w"(b)
15738 : /* No clobbers */);
15739 return result;
15742 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15743 vzip2_s32 (int32x2_t a, int32x2_t b)
15745 int32x2_t result;
15746 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15747 : "=w"(result)
15748 : "w"(a), "w"(b)
15749 : /* No clobbers */);
15750 return result;
15753 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15754 vzip2_u8 (uint8x8_t a, uint8x8_t b)
15756 uint8x8_t result;
15757 __asm__ ("zip2 %0.8b,%1.8b,%2.8b"
15758 : "=w"(result)
15759 : "w"(a), "w"(b)
15760 : /* No clobbers */);
15761 return result;
15764 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15765 vzip2_u16 (uint16x4_t a, uint16x4_t b)
15767 uint16x4_t result;
15768 __asm__ ("zip2 %0.4h,%1.4h,%2.4h"
15769 : "=w"(result)
15770 : "w"(a), "w"(b)
15771 : /* No clobbers */);
15772 return result;
15775 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15776 vzip2_u32 (uint32x2_t a, uint32x2_t b)
15778 uint32x2_t result;
15779 __asm__ ("zip2 %0.2s,%1.2s,%2.2s"
15780 : "=w"(result)
15781 : "w"(a), "w"(b)
15782 : /* No clobbers */);
15783 return result;
15786 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15787 vzip2q_f32 (float32x4_t a, float32x4_t b)
15789 float32x4_t result;
15790 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15791 : "=w"(result)
15792 : "w"(a), "w"(b)
15793 : /* No clobbers */);
15794 return result;
15797 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15798 vzip2q_f64 (float64x2_t a, float64x2_t b)
15800 float64x2_t result;
15801 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15802 : "=w"(result)
15803 : "w"(a), "w"(b)
15804 : /* No clobbers */);
15805 return result;
15808 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15809 vzip2q_p8 (poly8x16_t a, poly8x16_t b)
15811 poly8x16_t result;
15812 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15813 : "=w"(result)
15814 : "w"(a), "w"(b)
15815 : /* No clobbers */);
15816 return result;
15819 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15820 vzip2q_p16 (poly16x8_t a, poly16x8_t b)
15822 poly16x8_t result;
15823 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15824 : "=w"(result)
15825 : "w"(a), "w"(b)
15826 : /* No clobbers */);
15827 return result;
15830 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15831 vzip2q_s8 (int8x16_t a, int8x16_t b)
15833 int8x16_t result;
15834 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15835 : "=w"(result)
15836 : "w"(a), "w"(b)
15837 : /* No clobbers */);
15838 return result;
15841 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15842 vzip2q_s16 (int16x8_t a, int16x8_t b)
15844 int16x8_t result;
15845 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15846 : "=w"(result)
15847 : "w"(a), "w"(b)
15848 : /* No clobbers */);
15849 return result;
15852 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15853 vzip2q_s32 (int32x4_t a, int32x4_t b)
15855 int32x4_t result;
15856 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15857 : "=w"(result)
15858 : "w"(a), "w"(b)
15859 : /* No clobbers */);
15860 return result;
15863 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15864 vzip2q_s64 (int64x2_t a, int64x2_t b)
15866 int64x2_t result;
15867 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15868 : "=w"(result)
15869 : "w"(a), "w"(b)
15870 : /* No clobbers */);
15871 return result;
15874 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15875 vzip2q_u8 (uint8x16_t a, uint8x16_t b)
15877 uint8x16_t result;
15878 __asm__ ("zip2 %0.16b,%1.16b,%2.16b"
15879 : "=w"(result)
15880 : "w"(a), "w"(b)
15881 : /* No clobbers */);
15882 return result;
15885 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15886 vzip2q_u16 (uint16x8_t a, uint16x8_t b)
15888 uint16x8_t result;
15889 __asm__ ("zip2 %0.8h,%1.8h,%2.8h"
15890 : "=w"(result)
15891 : "w"(a), "w"(b)
15892 : /* No clobbers */);
15893 return result;
15896 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15897 vzip2q_u32 (uint32x4_t a, uint32x4_t b)
15899 uint32x4_t result;
15900 __asm__ ("zip2 %0.4s,%1.4s,%2.4s"
15901 : "=w"(result)
15902 : "w"(a), "w"(b)
15903 : /* No clobbers */);
15904 return result;
15907 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15908 vzip2q_u64 (uint64x2_t a, uint64x2_t b)
15910 uint64x2_t result;
15911 __asm__ ("zip2 %0.2d,%1.2d,%2.2d"
15912 : "=w"(result)
15913 : "w"(a), "w"(b)
15914 : /* No clobbers */);
15915 return result;
15918 /* End of temporary inline asm implementations. */
15920 /* Start of temporary inline asm for vldn, vstn and friends. */
15922 /* Create struct element types for duplicating loads.
15924 Create 2 element structures of:
15926 +------+----+----+----+----+
15927 | | 8 | 16 | 32 | 64 |
15928 +------+----+----+----+----+
15929 |int | Y | Y | N | N |
15930 +------+----+----+----+----+
15931 |uint | Y | Y | N | N |
15932 +------+----+----+----+----+
15933 |float | - | - | N | N |
15934 +------+----+----+----+----+
15935 |poly | Y | Y | - | - |
15936 +------+----+----+----+----+
15938 Create 3 element structures of:
15940 +------+----+----+----+----+
15941 | | 8 | 16 | 32 | 64 |
15942 +------+----+----+----+----+
15943 |int | Y | Y | Y | Y |
15944 +------+----+----+----+----+
15945 |uint | Y | Y | Y | Y |
15946 +------+----+----+----+----+
15947 |float | - | - | Y | Y |
15948 +------+----+----+----+----+
15949 |poly | Y | Y | - | - |
15950 +------+----+----+----+----+
15952 Create 4 element structures of:
15954 +------+----+----+----+----+
15955 | | 8 | 16 | 32 | 64 |
15956 +------+----+----+----+----+
15957 |int | Y | N | N | Y |
15958 +------+----+----+----+----+
15959 |uint | Y | N | N | Y |
15960 +------+----+----+----+----+
15961 |float | - | - | N | Y |
15962 +------+----+----+----+----+
15963 |poly | Y | N | - | - |
15964 +------+----+----+----+----+
15966 This is required for casting memory reference. */
15967 #define __STRUCTN(t, sz, nelem) \
15968 typedef struct t ## sz ## x ## nelem ## _t { \
15969 t ## sz ## _t val[nelem]; \
15970 } t ## sz ## x ## nelem ## _t;
15972 /* 2-element structs. */
15973 __STRUCTN (int, 8, 2)
15974 __STRUCTN (int, 16, 2)
15975 __STRUCTN (uint, 8, 2)
15976 __STRUCTN (uint, 16, 2)
15977 __STRUCTN (poly, 8, 2)
15978 __STRUCTN (poly, 16, 2)
15979 /* 3-element structs. */
15980 __STRUCTN (int, 8, 3)
15981 __STRUCTN (int, 16, 3)
15982 __STRUCTN (int, 32, 3)
15983 __STRUCTN (int, 64, 3)
15984 __STRUCTN (uint, 8, 3)
15985 __STRUCTN (uint, 16, 3)
15986 __STRUCTN (uint, 32, 3)
15987 __STRUCTN (uint, 64, 3)
15988 __STRUCTN (float, 32, 3)
15989 __STRUCTN (float, 64, 3)
15990 __STRUCTN (poly, 8, 3)
15991 __STRUCTN (poly, 16, 3)
15992 /* 4-element structs. */
15993 __STRUCTN (int, 8, 4)
15994 __STRUCTN (int, 64, 4)
15995 __STRUCTN (uint, 8, 4)
15996 __STRUCTN (uint, 64, 4)
15997 __STRUCTN (poly, 8, 4)
15998 __STRUCTN (float, 64, 4)
15999 #undef __STRUCTN
16001 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
16002 regsuffix, funcsuffix, Q) \
16003 __extension__ static __inline rettype \
16004 __attribute__ ((__always_inline__)) \
16005 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
16007 rettype result; \
16008 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
16009 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
16010 : "=Q"(result) \
16011 : "Q"(*(const structtype *)ptr) \
16012 : "memory", "v16", "v17"); \
16013 return result; \
16016 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
16017 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
16018 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
16019 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
16020 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
16021 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
16022 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
16023 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
16024 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
16025 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
16026 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
16027 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
16028 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
16029 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
16030 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
16031 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
16032 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
16033 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
16034 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
16035 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
16036 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
16037 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
16038 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
16039 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
16041 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
16042 lnsuffix, funcsuffix, Q) \
16043 __extension__ static __inline rettype \
16044 __attribute__ ((__always_inline__)) \
16045 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16046 rettype b, const int c) \
16048 rettype result; \
16049 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
16050 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
16051 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
16052 : "=Q"(result) \
16053 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
16054 : "memory", "v16", "v17"); \
16055 return result; \
16058 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
16059 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
16060 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
16061 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
16062 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
16063 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
16064 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
16065 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
16066 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
16067 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
16068 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
16069 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
16070 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
16071 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
16072 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
16073 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
16074 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
16075 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
16076 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
16077 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
16078 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
16079 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
16080 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
16081 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
16083 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
16084 regsuffix, funcsuffix, Q) \
16085 __extension__ static __inline rettype \
16086 __attribute__ ((__always_inline__)) \
16087 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
16089 rettype result; \
16090 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
16091 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
16092 : "=Q"(result) \
16093 : "Q"(*(const structtype *)ptr) \
16094 : "memory", "v16", "v17", "v18"); \
16095 return result; \
16098 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
16099 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
16100 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
16101 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
16102 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
16103 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
16104 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
16105 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
16106 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
16107 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
16108 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
16109 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
16110 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
16111 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
16112 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
16113 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
16114 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
16115 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
16116 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
16117 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
16118 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
16119 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
16120 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
16121 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
16123 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
16124 lnsuffix, funcsuffix, Q) \
16125 __extension__ static __inline rettype \
16126 __attribute__ ((__always_inline__)) \
16127 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16128 rettype b, const int c) \
16130 rettype result; \
16131 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
16132 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
16133 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
16134 : "=Q"(result) \
16135 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
16136 : "memory", "v16", "v17", "v18"); \
16137 return result; \
16140 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
16141 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
16142 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
16143 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
16144 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
16145 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
16146 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
16147 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
16148 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
16149 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
16150 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
16151 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
16152 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
16153 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
16154 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
16155 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
16156 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
16157 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
16158 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
16159 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
16160 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
16161 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
16162 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
16163 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
16165 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
16166 regsuffix, funcsuffix, Q) \
16167 __extension__ static __inline rettype \
16168 __attribute__ ((__always_inline__)) \
16169 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
16171 rettype result; \
16172 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
16173 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
16174 : "=Q"(result) \
16175 : "Q"(*(const structtype *)ptr) \
16176 : "memory", "v16", "v17", "v18", "v19"); \
16177 return result; \
16180 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
16181 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
16182 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
16183 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
16184 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
16185 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
16186 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
16187 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
16188 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
16189 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
16190 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
16191 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
16192 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
16193 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
16194 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
16195 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
16196 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
16197 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
16198 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
16199 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
16200 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
16201 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
16202 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
16203 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
16205 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
16206 lnsuffix, funcsuffix, Q) \
16207 __extension__ static __inline rettype \
16208 __attribute__ ((__always_inline__)) \
16209 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16210 rettype b, const int c) \
16212 rettype result; \
16213 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
16214 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
16215 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
16216 : "=Q"(result) \
16217 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
16218 : "memory", "v16", "v17", "v18", "v19"); \
16219 return result; \
16222 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
16223 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
16224 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
16225 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
16226 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
16227 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
16228 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
16229 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
16230 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
16231 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
16232 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
16233 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
16234 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
16235 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
16236 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
16237 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
16238 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
16239 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
16240 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
16241 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
16242 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
16243 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
16244 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
16245 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
16247 #define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \
16248 lnsuffix, funcsuffix, Q) \
16249 __extension__ static __inline void \
16250 __attribute__ ((__always_inline__)) \
16251 vst2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16252 intype b, const int c) \
16254 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
16255 "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \
16256 : "=Q"(*(intype *) ptr) \
16257 : "Q"(b), "i"(c) \
16258 : "memory", "v16", "v17"); \
16261 __ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,)
16262 __ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
16263 __ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
16264 __ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
16265 __ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
16266 __ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
16267 __ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
16268 __ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
16269 __ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
16270 __ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
16271 __ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
16272 __ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
16273 __ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
16274 __ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
16275 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
16276 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
16277 __ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
16278 __ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
16279 __ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
16280 __ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
16281 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
16282 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
16283 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
16284 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
16286 #define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \
16287 lnsuffix, funcsuffix, Q) \
16288 __extension__ static __inline void \
16289 __attribute__ ((__always_inline__)) \
16290 vst3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16291 intype b, const int c) \
16293 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
16294 "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \
16295 : "=Q"(*(intype *) ptr) \
16296 : "Q"(b), "i"(c) \
16297 : "memory", "v16", "v17", "v18"); \
16300 __ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,)
16301 __ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
16302 __ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
16303 __ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
16304 __ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
16305 __ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
16306 __ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
16307 __ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
16308 __ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
16309 __ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
16310 __ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
16311 __ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
16312 __ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
16313 __ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
16314 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
16315 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
16316 __ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
16317 __ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
16318 __ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
16319 __ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
16320 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
16321 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
16322 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
16323 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
16325 #define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \
16326 lnsuffix, funcsuffix, Q) \
16327 __extension__ static __inline void \
16328 __attribute__ ((__always_inline__)) \
16329 vst4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
16330 intype b, const int c) \
16332 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
16333 "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \
16334 : "=Q"(*(intype *) ptr) \
16335 : "Q"(b), "i"(c) \
16336 : "memory", "v16", "v17", "v18", "v19"); \
16339 __ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,)
16340 __ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
16341 __ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
16342 __ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
16343 __ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
16344 __ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
16345 __ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
16346 __ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
16347 __ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
16348 __ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
16349 __ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
16350 __ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
16351 __ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
16352 __ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
16353 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
16354 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
16355 __ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
16356 __ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
16357 __ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
16358 __ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
16359 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
16360 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
16361 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
16362 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
16364 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
16365 vaddlv_s32 (int32x2_t a)
16367 int64_t result;
16368 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
16369 return result;
16372 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16373 vaddlv_u32 (uint32x2_t a)
16375 uint64_t result;
16376 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
16377 return result;
16380 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16381 vpaddd_s64 (int64x2_t __a)
16383 return __builtin_aarch64_addpdi (__a);
16386 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16387 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
16389 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
16392 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16393 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
16395 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
16398 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16399 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
16401 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
16404 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16405 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
16407 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
16410 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16411 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
16413 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
16416 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16417 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
16419 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
16422 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16423 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
16425 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
16428 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16429 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
16431 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
16434 /* Table intrinsics. */
16436 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16437 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
16439 poly8x8_t result;
16440 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16441 : "=w"(result)
16442 : "w"(a), "w"(b)
16443 : /* No clobbers */);
16444 return result;
16447 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16448 vqtbl1_s8 (int8x16_t a, int8x8_t b)
16450 int8x8_t result;
16451 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16452 : "=w"(result)
16453 : "w"(a), "w"(b)
16454 : /* No clobbers */);
16455 return result;
16458 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16459 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
16461 uint8x8_t result;
16462 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
16463 : "=w"(result)
16464 : "w"(a), "w"(b)
16465 : /* No clobbers */);
16466 return result;
16469 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16470 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
16472 poly8x16_t result;
16473 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
16474 : "=w"(result)
16475 : "w"(a), "w"(b)
16476 : /* No clobbers */);
16477 return result;
16480 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16481 vqtbl1q_s8 (int8x16_t a, int8x16_t b)
16483 int8x16_t result;
16484 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
16485 : "=w"(result)
16486 : "w"(a), "w"(b)
16487 : /* No clobbers */);
16488 return result;
16491 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16492 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
16494 uint8x16_t result;
16495 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
16496 : "=w"(result)
16497 : "w"(a), "w"(b)
16498 : /* No clobbers */);
16499 return result;
16502 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16503 vqtbl2_s8 (int8x16x2_t tab, int8x8_t idx)
16505 int8x8_t result;
16506 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16507 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16508 :"=w"(result)
16509 :"Q"(tab),"w"(idx)
16510 :"memory", "v16", "v17");
16511 return result;
16514 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16515 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
16517 uint8x8_t result;
16518 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16519 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16520 :"=w"(result)
16521 :"Q"(tab),"w"(idx)
16522 :"memory", "v16", "v17");
16523 return result;
16526 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16527 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
16529 poly8x8_t result;
16530 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16531 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16532 :"=w"(result)
16533 :"Q"(tab),"w"(idx)
16534 :"memory", "v16", "v17");
16535 return result;
16538 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16539 vqtbl2q_s8 (int8x16x2_t tab, int8x16_t idx)
16541 int8x16_t result;
16542 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16543 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16544 :"=w"(result)
16545 :"Q"(tab),"w"(idx)
16546 :"memory", "v16", "v17");
16547 return result;
16550 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16551 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
16553 uint8x16_t result;
16554 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16555 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16556 :"=w"(result)
16557 :"Q"(tab),"w"(idx)
16558 :"memory", "v16", "v17");
16559 return result;
16562 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16563 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
16565 poly8x16_t result;
16566 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16567 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16568 :"=w"(result)
16569 :"Q"(tab),"w"(idx)
16570 :"memory", "v16", "v17");
16571 return result;
16574 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16575 vqtbl3_s8 (int8x16x3_t tab, int8x8_t idx)
16577 int8x8_t result;
16578 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16579 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16580 :"=w"(result)
16581 :"Q"(tab),"w"(idx)
16582 :"memory", "v16", "v17", "v18");
16583 return result;
16586 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16587 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
16589 uint8x8_t result;
16590 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16591 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16592 :"=w"(result)
16593 :"Q"(tab),"w"(idx)
16594 :"memory", "v16", "v17", "v18");
16595 return result;
16598 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16599 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
16601 poly8x8_t result;
16602 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16603 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16604 :"=w"(result)
16605 :"Q"(tab),"w"(idx)
16606 :"memory", "v16", "v17", "v18");
16607 return result;
16610 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16611 vqtbl3q_s8 (int8x16x3_t tab, int8x16_t idx)
16613 int8x16_t result;
16614 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16615 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16616 :"=w"(result)
16617 :"Q"(tab),"w"(idx)
16618 :"memory", "v16", "v17", "v18");
16619 return result;
16622 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16623 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
16625 uint8x16_t result;
16626 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16627 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16628 :"=w"(result)
16629 :"Q"(tab),"w"(idx)
16630 :"memory", "v16", "v17", "v18");
16631 return result;
16634 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16635 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
16637 poly8x16_t result;
16638 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16639 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16640 :"=w"(result)
16641 :"Q"(tab),"w"(idx)
16642 :"memory", "v16", "v17", "v18");
16643 return result;
16646 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16647 vqtbl4_s8 (int8x16x4_t tab, int8x8_t idx)
16649 int8x8_t result;
16650 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16651 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16652 :"=w"(result)
16653 :"Q"(tab),"w"(idx)
16654 :"memory", "v16", "v17", "v18", "v19");
16655 return result;
16658 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16659 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
16661 uint8x8_t result;
16662 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16663 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16664 :"=w"(result)
16665 :"Q"(tab),"w"(idx)
16666 :"memory", "v16", "v17", "v18", "v19");
16667 return result;
16670 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16671 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
16673 poly8x8_t result;
16674 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16675 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16676 :"=w"(result)
16677 :"Q"(tab),"w"(idx)
16678 :"memory", "v16", "v17", "v18", "v19");
16679 return result;
16683 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16684 vqtbl4q_s8 (int8x16x4_t tab, int8x16_t idx)
16686 int8x16_t result;
16687 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16688 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16689 :"=w"(result)
16690 :"Q"(tab),"w"(idx)
16691 :"memory", "v16", "v17", "v18", "v19");
16692 return result;
16695 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16696 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
16698 uint8x16_t result;
16699 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16700 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16701 :"=w"(result)
16702 :"Q"(tab),"w"(idx)
16703 :"memory", "v16", "v17", "v18", "v19");
16704 return result;
16707 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16708 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
16710 poly8x16_t result;
16711 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16712 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16713 :"=w"(result)
16714 :"Q"(tab),"w"(idx)
16715 :"memory", "v16", "v17", "v18", "v19");
16716 return result;
16720 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16721 vqtbx1_s8 (int8x8_t r, int8x16_t tab, int8x8_t idx)
16723 int8x8_t result = r;
16724 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
16725 : "+w"(result)
16726 : "w"(tab), "w"(idx)
16727 : /* No clobbers */);
16728 return result;
16731 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16732 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
16734 uint8x8_t result = r;
16735 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
16736 : "+w"(result)
16737 : "w"(tab), "w"(idx)
16738 : /* No clobbers */);
16739 return result;
16742 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16743 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
16745 poly8x8_t result = r;
16746 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
16747 : "+w"(result)
16748 : "w"(tab), "w"(idx)
16749 : /* No clobbers */);
16750 return result;
16753 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16754 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, int8x16_t idx)
16756 int8x16_t result = r;
16757 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16758 : "+w"(result)
16759 : "w"(tab), "w"(idx)
16760 : /* No clobbers */);
16761 return result;
16764 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16765 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
16767 uint8x16_t result = r;
16768 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16769 : "+w"(result)
16770 : "w"(tab), "w"(idx)
16771 : /* No clobbers */);
16772 return result;
16775 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16776 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
16778 poly8x16_t result = r;
16779 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
16780 : "+w"(result)
16781 : "w"(tab), "w"(idx)
16782 : /* No clobbers */);
16783 return result;
16786 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16787 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, int8x8_t idx)
16789 int8x8_t result = r;
16790 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16791 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16792 :"+w"(result)
16793 :"Q"(tab),"w"(idx)
16794 :"memory", "v16", "v17");
16795 return result;
16798 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16799 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
16801 uint8x8_t result = r;
16802 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16803 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16804 :"+w"(result)
16805 :"Q"(tab),"w"(idx)
16806 :"memory", "v16", "v17");
16807 return result;
16810 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16811 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
16813 poly8x8_t result = r;
16814 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16815 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
16816 :"+w"(result)
16817 :"Q"(tab),"w"(idx)
16818 :"memory", "v16", "v17");
16819 return result;
16823 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16824 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, int8x16_t idx)
16826 int8x16_t result = r;
16827 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16828 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16829 :"+w"(result)
16830 :"Q"(tab),"w"(idx)
16831 :"memory", "v16", "v17");
16832 return result;
16835 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16836 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
16838 uint8x16_t result = r;
16839 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16840 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16841 :"+w"(result)
16842 :"Q"(tab),"w"(idx)
16843 :"memory", "v16", "v17");
16844 return result;
16847 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16848 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
16850 poly8x16_t result = r;
16851 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
16852 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
16853 :"+w"(result)
16854 :"Q"(tab),"w"(idx)
16855 :"memory", "v16", "v17");
16856 return result;
16860 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16861 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, int8x8_t idx)
16863 int8x8_t result = r;
16864 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16865 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16866 :"+w"(result)
16867 :"Q"(tab),"w"(idx)
16868 :"memory", "v16", "v17", "v18");
16869 return result;
16872 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16873 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
16875 uint8x8_t result = r;
16876 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16877 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16878 :"+w"(result)
16879 :"Q"(tab),"w"(idx)
16880 :"memory", "v16", "v17", "v18");
16881 return result;
16884 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16885 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
16887 poly8x8_t result = r;
16888 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16889 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
16890 :"+w"(result)
16891 :"Q"(tab),"w"(idx)
16892 :"memory", "v16", "v17", "v18");
16893 return result;
16897 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16898 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, int8x16_t idx)
16900 int8x16_t result = r;
16901 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16902 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16903 :"+w"(result)
16904 :"Q"(tab),"w"(idx)
16905 :"memory", "v16", "v17", "v18");
16906 return result;
16909 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16910 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
16912 uint8x16_t result = r;
16913 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16914 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16915 :"+w"(result)
16916 :"Q"(tab),"w"(idx)
16917 :"memory", "v16", "v17", "v18");
16918 return result;
16921 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16922 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
16924 poly8x16_t result = r;
16925 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
16926 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
16927 :"+w"(result)
16928 :"Q"(tab),"w"(idx)
16929 :"memory", "v16", "v17", "v18");
16930 return result;
16934 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16935 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, int8x8_t idx)
16937 int8x8_t result = r;
16938 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16939 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16940 :"+w"(result)
16941 :"Q"(tab),"w"(idx)
16942 :"memory", "v16", "v17", "v18", "v19");
16943 return result;
16946 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16947 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
16949 uint8x8_t result = r;
16950 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16951 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16952 :"+w"(result)
16953 :"Q"(tab),"w"(idx)
16954 :"memory", "v16", "v17", "v18", "v19");
16955 return result;
16958 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16959 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
16961 poly8x8_t result = r;
16962 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16963 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
16964 :"+w"(result)
16965 :"Q"(tab),"w"(idx)
16966 :"memory", "v16", "v17", "v18", "v19");
16967 return result;
16971 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16972 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, int8x16_t idx)
16974 int8x16_t result = r;
16975 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16976 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16977 :"+w"(result)
16978 :"Q"(tab),"w"(idx)
16979 :"memory", "v16", "v17", "v18", "v19");
16980 return result;
16983 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16984 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
16986 uint8x16_t result = r;
16987 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
16988 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
16989 :"+w"(result)
16990 :"Q"(tab),"w"(idx)
16991 :"memory", "v16", "v17", "v18", "v19");
16992 return result;
16995 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16996 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
16998 poly8x16_t result = r;
16999 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
17000 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
17001 :"+w"(result)
17002 :"Q"(tab),"w"(idx)
17003 :"memory", "v16", "v17", "v18", "v19");
17004 return result;
17007 /* V7 legacy table intrinsics. */
17009 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17010 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
17012 int8x8_t result;
17013 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
17014 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17015 : "=w"(result)
17016 : "w"(temp), "w"(idx)
17017 : /* No clobbers */);
17018 return result;
17021 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17022 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
17024 uint8x8_t result;
17025 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
17026 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17027 : "=w"(result)
17028 : "w"(temp), "w"(idx)
17029 : /* No clobbers */);
17030 return result;
17033 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17034 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
17036 poly8x8_t result;
17037 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
17038 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17039 : "=w"(result)
17040 : "w"(temp), "w"(idx)
17041 : /* No clobbers */);
17042 return result;
17045 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17046 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
17048 int8x8_t result;
17049 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
17050 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17051 : "=w"(result)
17052 : "w"(temp), "w"(idx)
17053 : /* No clobbers */);
17054 return result;
17057 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17058 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
17060 uint8x8_t result;
17061 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
17062 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17063 : "=w"(result)
17064 : "w"(temp), "w"(idx)
17065 : /* No clobbers */);
17066 return result;
17069 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17070 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
17072 poly8x8_t result;
17073 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
17074 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
17075 : "=w"(result)
17076 : "w"(temp), "w"(idx)
17077 : /* No clobbers */);
17078 return result;
17081 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17082 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
17084 int8x8_t result;
17085 int8x16x2_t temp;
17086 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
17087 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
17088 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17089 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17090 : "=w"(result)
17091 : "Q"(temp), "w"(idx)
17092 : "v16", "v17", "memory");
17093 return result;
17096 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17097 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
17099 uint8x8_t result;
17100 uint8x16x2_t temp;
17101 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
17102 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
17103 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17104 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17105 : "=w"(result)
17106 : "Q"(temp), "w"(idx)
17107 : "v16", "v17", "memory");
17108 return result;
17111 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17112 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
17114 poly8x8_t result;
17115 poly8x16x2_t temp;
17116 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
17117 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
17118 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17119 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17120 : "=w"(result)
17121 : "Q"(temp), "w"(idx)
17122 : "v16", "v17", "memory");
17123 return result;
17126 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17127 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
17129 int8x8_t result;
17130 int8x16x2_t temp;
17131 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
17132 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
17133 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17134 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17135 : "=w"(result)
17136 : "Q"(temp), "w"(idx)
17137 : "v16", "v17", "memory");
17138 return result;
17141 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17142 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
17144 uint8x8_t result;
17145 uint8x16x2_t temp;
17146 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
17147 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
17148 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17149 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17150 : "=w"(result)
17151 : "Q"(temp), "w"(idx)
17152 : "v16", "v17", "memory");
17153 return result;
17156 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17157 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
17159 poly8x8_t result;
17160 poly8x16x2_t temp;
17161 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
17162 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
17163 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17164 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17165 : "=w"(result)
17166 : "Q"(temp), "w"(idx)
17167 : "v16", "v17", "memory");
17168 return result;
17171 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17172 vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
17174 int8x8_t result;
17175 int8x8_t tmp1;
17176 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (UINT64_C (0x0)));
17177 __asm__ ("movi %0.8b, 8\n\t"
17178 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17179 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
17180 "bsl %0.8b, %4.8b, %1.8b\n\t"
17181 : "+w"(result), "=w"(tmp1)
17182 : "w"(temp), "w"(idx), "w"(r)
17183 : /* No clobbers */);
17184 return result;
17187 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17188 vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
17190 uint8x8_t result;
17191 uint8x8_t tmp1;
17192 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (UINT64_C (0x0)));
17193 __asm__ ("movi %0.8b, 8\n\t"
17194 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17195 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
17196 "bsl %0.8b, %4.8b, %1.8b\n\t"
17197 : "+w"(result), "=w"(tmp1)
17198 : "w"(temp), "w"(idx), "w"(r)
17199 : /* No clobbers */);
17200 return result;
17203 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17204 vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
17206 poly8x8_t result;
17207 poly8x8_t tmp1;
17208 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (UINT64_C (0x0)));
17209 __asm__ ("movi %0.8b, 8\n\t"
17210 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17211 "tbl %1.8b, {%2.16b}, %3.8b\n\t"
17212 "bsl %0.8b, %4.8b, %1.8b\n\t"
17213 : "+w"(result), "=w"(tmp1)
17214 : "w"(temp), "w"(idx), "w"(r)
17215 : /* No clobbers */);
17216 return result;
17219 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17220 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
17222 int8x8_t result = r;
17223 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
17224 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
17225 : "+w"(result)
17226 : "w"(temp), "w"(idx)
17227 : /* No clobbers */);
17228 return result;
17231 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17232 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
17234 uint8x8_t result = r;
17235 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
17236 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
17237 : "+w"(result)
17238 : "w"(temp), "w"(idx)
17239 : /* No clobbers */);
17240 return result;
17243 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17244 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
17246 poly8x8_t result = r;
17247 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
17248 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
17249 : "+w"(result)
17250 : "w"(temp), "w"(idx)
17251 : /* No clobbers */);
17252 return result;
17255 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17256 vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
17258 int8x8_t result;
17259 int8x8_t tmp1;
17260 int8x16x2_t temp;
17261 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
17262 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (UINT64_C (0x0)));
17263 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
17264 "movi %0.8b, 24\n\t"
17265 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17266 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
17267 "bsl %0.8b, %4.8b, %1.8b\n\t"
17268 : "+w"(result), "=w"(tmp1)
17269 : "Q"(temp), "w"(idx), "w"(r)
17270 : "v16", "v17", "memory");
17271 return result;
17274 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17275 vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
17277 uint8x8_t result;
17278 uint8x8_t tmp1;
17279 uint8x16x2_t temp;
17280 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
17281 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (UINT64_C (0x0)));
17282 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
17283 "movi %0.8b, 24\n\t"
17284 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17285 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
17286 "bsl %0.8b, %4.8b, %1.8b\n\t"
17287 : "+w"(result), "=w"(tmp1)
17288 : "Q"(temp), "w"(idx), "w"(r)
17289 : "v16", "v17", "memory");
17290 return result;
17293 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17294 vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
17296 poly8x8_t result;
17297 poly8x8_t tmp1;
17298 poly8x16x2_t temp;
17299 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
17300 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (UINT64_C (0x0)));
17301 __asm__ ("ld1 {v16.16b - v17.16b}, %2\n\t"
17302 "movi %0.8b, 24\n\t"
17303 "cmhs %0.8b, %3.8b, %0.8b\n\t"
17304 "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
17305 "bsl %0.8b, %4.8b, %1.8b\n\t"
17306 : "+w"(result), "=w"(tmp1)
17307 : "Q"(temp), "w"(idx), "w"(r)
17308 : "v16", "v17", "memory");
17309 return result;
17312 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17313 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
17315 int8x8_t result = r;
17316 int8x16x2_t temp;
17317 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
17318 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
17319 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17320 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17321 : "+w"(result)
17322 : "Q"(temp), "w"(idx)
17323 : "v16", "v17", "memory");
17324 return result;
17327 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17328 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
17330 uint8x8_t result = r;
17331 uint8x16x2_t temp;
17332 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
17333 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
17334 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17335 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17336 : "+w"(result)
17337 : "Q"(temp), "w"(idx)
17338 : "v16", "v17", "memory");
17339 return result;
17342 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
17343 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
17345 poly8x8_t result = r;
17346 poly8x16x2_t temp;
17347 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
17348 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
17349 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
17350 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
17351 : "+w"(result)
17352 : "Q"(temp), "w"(idx)
17353 : "v16", "v17", "memory");
17354 return result;
17357 /* End of temporary inline asm. */
17359 /* Start of optimal implementations in approved order. */
17361 /* vabs */
17363 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17364 vabs_f32 (float32x2_t __a)
17366 return __builtin_aarch64_absv2sf (__a);
17369 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
17370 vabs_f64 (float64x1_t __a)
17372 return __builtin_fabs (__a);
17375 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17376 vabs_s8 (int8x8_t __a)
17378 return __builtin_aarch64_absv8qi (__a);
17381 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17382 vabs_s16 (int16x4_t __a)
17384 return __builtin_aarch64_absv4hi (__a);
17387 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17388 vabs_s32 (int32x2_t __a)
17390 return __builtin_aarch64_absv2si (__a);
17393 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17394 vabs_s64 (int64x1_t __a)
17396 return __builtin_llabs (__a);
17399 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17400 vabsq_f32 (float32x4_t __a)
17402 return __builtin_aarch64_absv4sf (__a);
17405 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17406 vabsq_f64 (float64x2_t __a)
17408 return __builtin_aarch64_absv2df (__a);
17411 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17412 vabsq_s8 (int8x16_t __a)
17414 return __builtin_aarch64_absv16qi (__a);
17417 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17418 vabsq_s16 (int16x8_t __a)
17420 return __builtin_aarch64_absv8hi (__a);
17423 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17424 vabsq_s32 (int32x4_t __a)
17426 return __builtin_aarch64_absv4si (__a);
17429 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17430 vabsq_s64 (int64x2_t __a)
17432 return __builtin_aarch64_absv2di (__a);
17435 /* vadd */
17437 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
17438 vaddd_s64 (int64x1_t __a, int64x1_t __b)
17440 return __a + __b;
17443 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17444 vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
17446 return __a + __b;
17449 /* vaddv */
17451 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17452 vaddv_s8 (int8x8_t __a)
17454 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
17457 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17458 vaddv_s16 (int16x4_t __a)
17460 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
17463 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17464 vaddv_s32 (int32x2_t __a)
17466 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
17469 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17470 vaddv_u8 (uint8x8_t __a)
17472 return vget_lane_u8 ((uint8x8_t)
17473 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0);
17476 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17477 vaddv_u16 (uint16x4_t __a)
17479 return vget_lane_u16 ((uint16x4_t)
17480 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0);
17483 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17484 vaddv_u32 (uint32x2_t __a)
17486 return vget_lane_u32 ((uint32x2_t)
17487 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0);
17490 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
17491 vaddvq_s8 (int8x16_t __a)
17493 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0);
17496 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
17497 vaddvq_s16 (int16x8_t __a)
17499 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
17502 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17503 vaddvq_s32 (int32x4_t __a)
17505 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
17508 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
17509 vaddvq_s64 (int64x2_t __a)
17511 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
17514 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
17515 vaddvq_u8 (uint8x16_t __a)
17517 return vgetq_lane_u8 ((uint8x16_t)
17518 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0);
17521 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
17522 vaddvq_u16 (uint16x8_t __a)
17524 return vgetq_lane_u16 ((uint16x8_t)
17525 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0);
17528 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17529 vaddvq_u32 (uint32x4_t __a)
17531 return vgetq_lane_u32 ((uint32x4_t)
17532 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0);
17535 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17536 vaddvq_u64 (uint64x2_t __a)
17538 return vgetq_lane_u64 ((uint64x2_t)
17539 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0);
17542 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17543 vaddv_f32 (float32x2_t __a)
17545 float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
17546 return vget_lane_f32 (t, 0);
17549 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
17550 vaddvq_f32 (float32x4_t __a)
17552 float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
17553 return vgetq_lane_f32 (t, 0);
17556 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
17557 vaddvq_f64 (float64x2_t __a)
17559 float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
17560 return vgetq_lane_f64 (t, 0);
17563 /* vcage */
17565 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17566 vcages_f32 (float32_t __a, float32_t __b)
17568 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
17571 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17572 vcage_f32 (float32x2_t __a, float32x2_t __b)
17574 return vabs_f32 (__a) >= vabs_f32 (__b);
17577 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17578 vcageq_f32 (float32x4_t __a, float32x4_t __b)
17580 return vabsq_f32 (__a) >= vabsq_f32 (__b);
17583 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17584 vcaged_f64 (float64_t __a, float64_t __b)
17586 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
17589 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17590 vcageq_f64 (float64x2_t __a, float64x2_t __b)
17592 return vabsq_f64 (__a) >= vabsq_f64 (__b);
17595 /* vcagt */
17597 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17598 vcagts_f32 (float32_t __a, float32_t __b)
17600 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
17603 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17604 vcagt_f32 (float32x2_t __a, float32x2_t __b)
17606 return vabs_f32 (__a) > vabs_f32 (__b);
17609 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17610 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
17612 return vabsq_f32 (__a) > vabsq_f32 (__b);
17615 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17616 vcagtd_f64 (float64_t __a, float64_t __b)
17618 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
17621 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17622 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
17624 return vabsq_f64 (__a) > vabsq_f64 (__b);
17627 /* vcale */
17629 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17630 vcale_f32 (float32x2_t __a, float32x2_t __b)
17632 return vabs_f32 (__a) <= vabs_f32 (__b);
17635 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17636 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
17638 return vabsq_f32 (__a) <= vabsq_f32 (__b);
17641 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17642 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
17644 return vabsq_f64 (__a) <= vabsq_f64 (__b);
17647 /* vcalt */
17649 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17650 vcalt_f32 (float32x2_t __a, float32x2_t __b)
17652 return vabs_f32 (__a) < vabs_f32 (__b);
17655 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17656 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
17658 return vabsq_f32 (__a) < vabsq_f32 (__b);
17661 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17662 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
17664 return vabsq_f64 (__a) < vabsq_f64 (__b);
17667 /* vceq - vector. */
17669 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17670 vceq_f32 (float32x2_t __a, float32x2_t __b)
17672 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
17675 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17676 vceq_f64 (float64x1_t __a, float64x1_t __b)
17678 return __a == __b ? -1ll : 0ll;
17681 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17682 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
17684 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17685 (int8x8_t) __b);
17688 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17689 vceq_s8 (int8x8_t __a, int8x8_t __b)
17691 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
17694 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17695 vceq_s16 (int16x4_t __a, int16x4_t __b)
17697 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
17700 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17701 vceq_s32 (int32x2_t __a, int32x2_t __b)
17703 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
17706 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17707 vceq_s64 (int64x1_t __a, int64x1_t __b)
17709 return __a == __b ? -1ll : 0ll;
17712 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17713 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
17715 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17716 (int8x8_t) __b);
17719 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17720 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
17722 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
17723 (int16x4_t) __b);
17726 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17727 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
17729 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
17730 (int32x2_t) __b);
17733 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17734 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
17736 return __a == __b ? -1ll : 0ll;
17739 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17740 vceqq_f32 (float32x4_t __a, float32x4_t __b)
17742 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
17745 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17746 vceqq_f64 (float64x2_t __a, float64x2_t __b)
17748 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
17751 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17752 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
17754 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17755 (int8x16_t) __b);
17758 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17759 vceqq_s8 (int8x16_t __a, int8x16_t __b)
17761 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
17764 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17765 vceqq_s16 (int16x8_t __a, int16x8_t __b)
17767 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
17770 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17771 vceqq_s32 (int32x4_t __a, int32x4_t __b)
17773 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
17776 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17777 vceqq_s64 (int64x2_t __a, int64x2_t __b)
17779 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
17782 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17783 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
17785 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17786 (int8x16_t) __b);
17789 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17790 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
17792 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
17793 (int16x8_t) __b);
17796 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17797 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
17799 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
17800 (int32x4_t) __b);
17803 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17804 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
17806 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
17807 (int64x2_t) __b);
17810 /* vceq - scalar. */
17812 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
17813 vceqs_f32 (float32_t __a, float32_t __b)
17815 return __a == __b ? -1 : 0;
17818 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17819 vceqd_s64 (int64x1_t __a, int64x1_t __b)
17821 return __a == __b ? -1ll : 0ll;
17824 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17825 vceqd_u64 (uint64x1_t __a, uint64x1_t __b)
17827 return __a == __b ? -1ll : 0ll;
17830 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
17831 vceqd_f64 (float64_t __a, float64_t __b)
17833 return __a == __b ? -1ll : 0ll;
17836 /* vceqz - vector. */
17838 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17839 vceqz_f32 (float32x2_t __a)
17841 float32x2_t __b = {0.0f, 0.0f};
17842 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
17845 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17846 vceqz_f64 (float64x1_t __a)
17848 return __a == 0.0 ? -1ll : 0ll;
17851 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17852 vceqz_p8 (poly8x8_t __a)
17854 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17855 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17856 (int8x8_t) __b);
17859 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17860 vceqz_s8 (int8x8_t __a)
17862 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17863 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
17866 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17867 vceqz_s16 (int16x4_t __a)
17869 int16x4_t __b = {0, 0, 0, 0};
17870 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
17873 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17874 vceqz_s32 (int32x2_t __a)
17876 int32x2_t __b = {0, 0};
17877 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
17880 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17881 vceqz_s64 (int64x1_t __a)
17883 return __a == 0ll ? -1ll : 0ll;
17886 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17887 vceqz_u8 (uint8x8_t __a)
17889 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17890 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
17891 (int8x8_t) __b);
17894 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17895 vceqz_u16 (uint16x4_t __a)
17897 uint16x4_t __b = {0, 0, 0, 0};
17898 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
17899 (int16x4_t) __b);
17902 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17903 vceqz_u32 (uint32x2_t __a)
17905 uint32x2_t __b = {0, 0};
17906 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
17907 (int32x2_t) __b);
17910 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
17911 vceqz_u64 (uint64x1_t __a)
17913 return __a == 0ll ? -1ll : 0ll;
17916 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17917 vceqzq_f32 (float32x4_t __a)
17919 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
17920 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
17923 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17924 vceqzq_f64 (float64x2_t __a)
17926 float64x2_t __b = {0.0, 0.0};
17927 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
17930 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17931 vceqzq_p8 (poly8x16_t __a)
17933 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17934 0, 0, 0, 0, 0, 0, 0, 0};
17935 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17936 (int8x16_t) __b);
17939 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17940 vceqzq_s8 (int8x16_t __a)
17942 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17943 0, 0, 0, 0, 0, 0, 0, 0};
17944 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
17947 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17948 vceqzq_s16 (int16x8_t __a)
17950 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17951 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
17954 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17955 vceqzq_s32 (int32x4_t __a)
17957 int32x4_t __b = {0, 0, 0, 0};
17958 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
17961 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17962 vceqzq_s64 (int64x2_t __a)
17964 int64x2_t __b = {0, 0};
17965 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
17968 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17969 vceqzq_u8 (uint8x16_t __a)
17971 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
17972 0, 0, 0, 0, 0, 0, 0, 0};
17973 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
17974 (int8x16_t) __b);
17977 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17978 vceqzq_u16 (uint16x8_t __a)
17980 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
17981 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
17982 (int16x8_t) __b);
17985 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17986 vceqzq_u32 (uint32x4_t __a)
17988 uint32x4_t __b = {0, 0, 0, 0};
17989 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
17990 (int32x4_t) __b);
17993 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17994 vceqzq_u64 (uint64x2_t __a)
17996 uint64x2_t __b = {0, 0};
17997 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
17998 (int64x2_t) __b);
18001 /* vceqz - scalar. */
18003 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18004 vceqzs_f32 (float32_t __a)
18006 return __a == 0.0f ? -1 : 0;
18009 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18010 vceqzd_s64 (int64x1_t __a)
18012 return __a == 0 ? -1ll : 0ll;
18015 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18016 vceqzd_u64 (int64x1_t __a)
18018 return __a == 0 ? -1ll : 0ll;
18021 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18022 vceqzd_f64 (float64_t __a)
18024 return __a == 0.0 ? -1ll : 0ll;
18027 /* vcge - vector. */
18029 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18030 vcge_f32 (float32x2_t __a, float32x2_t __b)
18032 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
18035 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18036 vcge_f64 (float64x1_t __a, float64x1_t __b)
18038 return __a >= __b ? -1ll : 0ll;
18041 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18042 vcge_p8 (poly8x8_t __a, poly8x8_t __b)
18044 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
18045 (int8x8_t) __b);
18048 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18049 vcge_s8 (int8x8_t __a, int8x8_t __b)
18051 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
18054 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18055 vcge_s16 (int16x4_t __a, int16x4_t __b)
18057 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
18060 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18061 vcge_s32 (int32x2_t __a, int32x2_t __b)
18063 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
18066 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18067 vcge_s64 (int64x1_t __a, int64x1_t __b)
18069 return __a >= __b ? -1ll : 0ll;
18072 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18073 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
18075 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
18076 (int8x8_t) __b);
18079 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18080 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
18082 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
18083 (int16x4_t) __b);
18086 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18087 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
18089 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
18090 (int32x2_t) __b);
18093 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18094 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
18096 return __a >= __b ? -1ll : 0ll;
18099 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18100 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
18102 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
18105 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18106 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
18108 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
18111 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18112 vcgeq_p8 (poly8x16_t __a, poly8x16_t __b)
18114 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
18115 (int8x16_t) __b);
18118 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18119 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
18121 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
18124 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18125 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
18127 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
18130 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18131 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
18133 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
18136 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18137 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
18139 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
18142 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18143 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
18145 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
18146 (int8x16_t) __b);
18149 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18150 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
18152 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
18153 (int16x8_t) __b);
18156 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18157 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
18159 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
18160 (int32x4_t) __b);
18163 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18164 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
18166 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
18167 (int64x2_t) __b);
18170 /* vcge - scalar. */
18172 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18173 vcges_f32 (float32_t __a, float32_t __b)
18175 return __a >= __b ? -1 : 0;
18178 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18179 vcged_s64 (int64x1_t __a, int64x1_t __b)
18181 return __a >= __b ? -1ll : 0ll;
18184 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18185 vcged_u64 (uint64x1_t __a, uint64x1_t __b)
18187 return __a >= __b ? -1ll : 0ll;
18190 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18191 vcged_f64 (float64_t __a, float64_t __b)
18193 return __a >= __b ? -1ll : 0ll;
18196 /* vcgez - vector. */
18198 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18199 vcgez_f32 (float32x2_t __a)
18201 float32x2_t __b = {0.0f, 0.0f};
18202 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
18205 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18206 vcgez_f64 (float64x1_t __a)
18208 return __a >= 0.0 ? -1ll : 0ll;
18211 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18212 vcgez_p8 (poly8x8_t __a)
18214 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18215 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __a,
18216 (int8x8_t) __b);
18219 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18220 vcgez_s8 (int8x8_t __a)
18222 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18223 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
18226 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18227 vcgez_s16 (int16x4_t __a)
18229 int16x4_t __b = {0, 0, 0, 0};
18230 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
18233 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18234 vcgez_s32 (int32x2_t __a)
18236 int32x2_t __b = {0, 0};
18237 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
18240 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18241 vcgez_s64 (int64x1_t __a)
18243 return __a >= 0ll ? -1ll : 0ll;
18246 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18247 vcgez_u8 (uint8x8_t __a)
18249 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18250 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
18251 (int8x8_t) __b);
18254 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18255 vcgez_u16 (uint16x4_t __a)
18257 uint16x4_t __b = {0, 0, 0, 0};
18258 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
18259 (int16x4_t) __b);
18262 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18263 vcgez_u32 (uint32x2_t __a)
18265 uint32x2_t __b = {0, 0};
18266 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
18267 (int32x2_t) __b);
18270 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18271 vcgez_u64 (uint64x1_t __a)
18273 return __a >= 0ll ? -1ll : 0ll;
18276 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18277 vcgezq_f32 (float32x4_t __a)
18279 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
18280 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
18283 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18284 vcgezq_f64 (float64x2_t __a)
18286 float64x2_t __b = {0.0, 0.0};
18287 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
18290 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18291 vcgezq_p8 (poly8x16_t __a)
18293 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18294 0, 0, 0, 0, 0, 0, 0, 0};
18295 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __a,
18296 (int8x16_t) __b);
18299 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18300 vcgezq_s8 (int8x16_t __a)
18302 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18303 0, 0, 0, 0, 0, 0, 0, 0};
18304 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
18307 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18308 vcgezq_s16 (int16x8_t __a)
18310 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18311 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
18314 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18315 vcgezq_s32 (int32x4_t __a)
18317 int32x4_t __b = {0, 0, 0, 0};
18318 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
18321 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18322 vcgezq_s64 (int64x2_t __a)
18324 int64x2_t __b = {0, 0};
18325 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
18328 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18329 vcgezq_u8 (uint8x16_t __a)
18331 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18332 0, 0, 0, 0, 0, 0, 0, 0};
18333 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
18334 (int8x16_t) __b);
18337 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18338 vcgezq_u16 (uint16x8_t __a)
18340 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18341 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
18342 (int16x8_t) __b);
18345 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18346 vcgezq_u32 (uint32x4_t __a)
18348 uint32x4_t __b = {0, 0, 0, 0};
18349 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
18350 (int32x4_t) __b);
18353 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18354 vcgezq_u64 (uint64x2_t __a)
18356 uint64x2_t __b = {0, 0};
18357 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
18358 (int64x2_t) __b);
18361 /* vcgez - scalar. */
18363 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18364 vcgezs_f32 (float32_t __a)
18366 return __a >= 0.0f ? -1 : 0;
18369 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18370 vcgezd_s64 (int64x1_t __a)
18372 return __a >= 0 ? -1ll : 0ll;
18375 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18376 vcgezd_u64 (int64x1_t __a)
18378 return __a >= 0 ? -1ll : 0ll;
18381 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18382 vcgezd_f64 (float64_t __a)
18384 return __a >= 0.0 ? -1ll : 0ll;
18387 /* vcgt - vector. */
18389 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18390 vcgt_f32 (float32x2_t __a, float32x2_t __b)
18392 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
18395 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18396 vcgt_f64 (float64x1_t __a, float64x1_t __b)
18398 return __a > __b ? -1ll : 0ll;
18401 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18402 vcgt_p8 (poly8x8_t __a, poly8x8_t __b)
18404 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
18405 (int8x8_t) __b);
18408 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18409 vcgt_s8 (int8x8_t __a, int8x8_t __b)
18411 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
18414 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18415 vcgt_s16 (int16x4_t __a, int16x4_t __b)
18417 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
18420 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18421 vcgt_s32 (int32x2_t __a, int32x2_t __b)
18423 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
18426 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18427 vcgt_s64 (int64x1_t __a, int64x1_t __b)
18429 return __a > __b ? -1ll : 0ll;
18432 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18433 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
18435 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
18436 (int8x8_t) __b);
18439 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18440 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
18442 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
18443 (int16x4_t) __b);
18446 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18447 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
18449 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
18450 (int32x2_t) __b);
18453 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18454 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
18456 return __a > __b ? -1ll : 0ll;
18459 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18460 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
18462 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
18465 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18466 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
18468 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
18471 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18472 vcgtq_p8 (poly8x16_t __a, poly8x16_t __b)
18474 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
18475 (int8x16_t) __b);
18478 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18479 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
18481 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
18484 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18485 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
18487 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
18490 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18491 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
18493 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
18496 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18497 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
18499 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
18502 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18503 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
18505 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
18506 (int8x16_t) __b);
18509 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18510 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
18512 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
18513 (int16x8_t) __b);
18516 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18517 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
18519 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
18520 (int32x4_t) __b);
18523 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18524 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
18526 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
18527 (int64x2_t) __b);
18530 /* vcgt - scalar. */
18532 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18533 vcgts_f32 (float32_t __a, float32_t __b)
18535 return __a > __b ? -1 : 0;
18538 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18539 vcgtd_s64 (int64x1_t __a, int64x1_t __b)
18541 return __a > __b ? -1ll : 0ll;
18544 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18545 vcgtd_u64 (uint64x1_t __a, uint64x1_t __b)
18547 return __a > __b ? -1ll : 0ll;
18550 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18551 vcgtd_f64 (float64_t __a, float64_t __b)
18553 return __a > __b ? -1ll : 0ll;
18556 /* vcgtz - vector. */
18558 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18559 vcgtz_f32 (float32x2_t __a)
18561 float32x2_t __b = {0.0f, 0.0f};
18562 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
18565 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18566 vcgtz_f64 (float64x1_t __a)
18568 return __a > 0.0 ? -1ll : 0ll;
18571 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18572 vcgtz_p8 (poly8x8_t __a)
18574 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18575 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __a,
18576 (int8x8_t) __b);
18579 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18580 vcgtz_s8 (int8x8_t __a)
18582 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18583 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
18586 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18587 vcgtz_s16 (int16x4_t __a)
18589 int16x4_t __b = {0, 0, 0, 0};
18590 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
18593 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18594 vcgtz_s32 (int32x2_t __a)
18596 int32x2_t __b = {0, 0};
18597 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
18600 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18601 vcgtz_s64 (int64x1_t __a)
18603 return __a > 0ll ? -1ll : 0ll;
18606 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18607 vcgtz_u8 (uint8x8_t __a)
18609 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18610 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
18611 (int8x8_t) __b);
18614 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18615 vcgtz_u16 (uint16x4_t __a)
18617 uint16x4_t __b = {0, 0, 0, 0};
18618 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
18619 (int16x4_t) __b);
18622 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18623 vcgtz_u32 (uint32x2_t __a)
18625 uint32x2_t __b = {0, 0};
18626 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
18627 (int32x2_t) __b);
18630 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18631 vcgtz_u64 (uint64x1_t __a)
18633 return __a > 0ll ? -1ll : 0ll;
18636 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18637 vcgtzq_f32 (float32x4_t __a)
18639 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
18640 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
18643 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18644 vcgtzq_f64 (float64x2_t __a)
18646 float64x2_t __b = {0.0, 0.0};
18647 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
18650 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18651 vcgtzq_p8 (poly8x16_t __a)
18653 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18654 0, 0, 0, 0, 0, 0, 0, 0};
18655 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __a,
18656 (int8x16_t) __b);
18659 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18660 vcgtzq_s8 (int8x16_t __a)
18662 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18663 0, 0, 0, 0, 0, 0, 0, 0};
18664 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
18667 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18668 vcgtzq_s16 (int16x8_t __a)
18670 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18671 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
18674 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18675 vcgtzq_s32 (int32x4_t __a)
18677 int32x4_t __b = {0, 0, 0, 0};
18678 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
18681 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18682 vcgtzq_s64 (int64x2_t __a)
18684 int64x2_t __b = {0, 0};
18685 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
18688 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18689 vcgtzq_u8 (uint8x16_t __a)
18691 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18692 0, 0, 0, 0, 0, 0, 0, 0};
18693 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
18694 (int8x16_t) __b);
18697 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18698 vcgtzq_u16 (uint16x8_t __a)
18700 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18701 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
18702 (int16x8_t) __b);
18705 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18706 vcgtzq_u32 (uint32x4_t __a)
18708 uint32x4_t __b = {0, 0, 0, 0};
18709 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
18710 (int32x4_t) __b);
18713 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18714 vcgtzq_u64 (uint64x2_t __a)
18716 uint64x2_t __b = {0, 0};
18717 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
18718 (int64x2_t) __b);
18721 /* vcgtz - scalar. */
18723 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18724 vcgtzs_f32 (float32_t __a)
18726 return __a > 0.0f ? -1 : 0;
18729 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18730 vcgtzd_s64 (int64x1_t __a)
18732 return __a > 0 ? -1ll : 0ll;
18735 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18736 vcgtzd_u64 (int64x1_t __a)
18738 return __a > 0 ? -1ll : 0ll;
18741 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18742 vcgtzd_f64 (float64_t __a)
18744 return __a > 0.0 ? -1ll : 0ll;
18747 /* vcle - vector. */
18749 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18750 vcle_f32 (float32x2_t __a, float32x2_t __b)
18752 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
18755 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18756 vcle_f64 (float64x1_t __a, float64x1_t __b)
18758 return __a <= __b ? -1ll : 0ll;
18761 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18762 vcle_p8 (poly8x8_t __a, poly8x8_t __b)
18764 return (uint8x8_t) __builtin_aarch64_cmgev8qi ((int8x8_t) __b,
18765 (int8x8_t) __a);
18768 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18769 vcle_s8 (int8x8_t __a, int8x8_t __b)
18771 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
18774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18775 vcle_s16 (int16x4_t __a, int16x4_t __b)
18777 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
18780 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18781 vcle_s32 (int32x2_t __a, int32x2_t __b)
18783 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
18786 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18787 vcle_s64 (int64x1_t __a, int64x1_t __b)
18789 return __a <= __b ? -1ll : 0ll;
18792 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18793 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
18795 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
18796 (int8x8_t) __a);
18799 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18800 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
18802 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
18803 (int16x4_t) __a);
18806 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18807 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
18809 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
18810 (int32x2_t) __a);
18813 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18814 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
18816 return __a <= __b ? -1ll : 0ll;
18819 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18820 vcleq_f32 (float32x4_t __a, float32x4_t __b)
18822 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
18825 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18826 vcleq_f64 (float64x2_t __a, float64x2_t __b)
18828 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
18831 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18832 vcleq_p8 (poly8x16_t __a, poly8x16_t __b)
18834 return (uint8x16_t) __builtin_aarch64_cmgev16qi ((int8x16_t) __b,
18835 (int8x16_t) __a);
18838 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18839 vcleq_s8 (int8x16_t __a, int8x16_t __b)
18841 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
18844 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18845 vcleq_s16 (int16x8_t __a, int16x8_t __b)
18847 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
18850 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18851 vcleq_s32 (int32x4_t __a, int32x4_t __b)
18853 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
18856 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18857 vcleq_s64 (int64x2_t __a, int64x2_t __b)
18859 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
18862 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18863 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
18865 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
18866 (int8x16_t) __a);
18869 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18870 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
18872 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
18873 (int16x8_t) __a);
18876 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18877 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
18879 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
18880 (int32x4_t) __a);
18883 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18884 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
18886 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
18887 (int64x2_t) __a);
18890 /* vcle - scalar. */
18892 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18893 vcles_f32 (float32_t __a, float32_t __b)
18895 return __a <= __b ? -1 : 0;
18898 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18899 vcled_s64 (int64x1_t __a, int64x1_t __b)
18901 return __a <= __b ? -1ll : 0ll;
18904 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18905 vcled_u64 (uint64x1_t __a, uint64x1_t __b)
18907 return __a <= __b ? -1ll : 0ll;
18910 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
18911 vcled_f64 (float64_t __a, float64_t __b)
18913 return __a <= __b ? -1ll : 0ll;
18916 /* vclez - vector. */
18918 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18919 vclez_f32 (float32x2_t __a)
18921 float32x2_t __b = {0.0f, 0.0f};
18922 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
18925 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18926 vclez_f64 (float64x1_t __a)
18928 return __a <= 0.0 ? -1ll : 0ll;
18931 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18932 vclez_p8 (poly8x8_t __a)
18934 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18935 return (uint8x8_t) __builtin_aarch64_cmlev8qi ((int8x8_t) __a,
18936 (int8x8_t) __b);
18939 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18940 vclez_s8 (int8x8_t __a)
18942 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
18943 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
18946 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18947 vclez_s16 (int16x4_t __a)
18949 int16x4_t __b = {0, 0, 0, 0};
18950 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
18953 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18954 vclez_s32 (int32x2_t __a)
18956 int32x2_t __b = {0, 0};
18957 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
18960 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18961 vclez_s64 (int64x1_t __a)
18963 return __a <= 0ll ? -1ll : 0ll;
18966 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18967 vclez_u64 (uint64x1_t __a)
18969 return __a <= 0ll ? -1ll : 0ll;
18972 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18973 vclezq_f32 (float32x4_t __a)
18975 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
18976 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
18979 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18980 vclezq_f64 (float64x2_t __a)
18982 float64x2_t __b = {0.0, 0.0};
18983 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
18986 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18987 vclezq_p8 (poly8x16_t __a)
18989 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18990 0, 0, 0, 0, 0, 0, 0, 0};
18991 return (uint8x16_t) __builtin_aarch64_cmlev16qi ((int8x16_t) __a,
18992 (int8x16_t) __b);
18995 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18996 vclezq_s8 (int8x16_t __a)
18998 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
18999 0, 0, 0, 0, 0, 0, 0, 0};
19000 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
19003 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19004 vclezq_s16 (int16x8_t __a)
19006 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
19007 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
19010 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19011 vclezq_s32 (int32x4_t __a)
19013 int32x4_t __b = {0, 0, 0, 0};
19014 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
19017 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19018 vclezq_s64 (int64x2_t __a)
19020 int64x2_t __b = {0, 0};
19021 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
19024 /* vclez - scalar. */
19026 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19027 vclezs_f32 (float32_t __a)
19029 return __a <= 0.0f ? -1 : 0;
19032 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19033 vclezd_s64 (int64x1_t __a)
19035 return __a <= 0 ? -1ll : 0ll;
19038 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19039 vclezd_u64 (int64x1_t __a)
19041 return __a <= 0 ? -1ll : 0ll;
19044 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19045 vclezd_f64 (float64_t __a)
19047 return __a <= 0.0 ? -1ll : 0ll;
19050 /* vclt - vector. */
19052 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19053 vclt_f32 (float32x2_t __a, float32x2_t __b)
19055 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
19058 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19059 vclt_f64 (float64x1_t __a, float64x1_t __b)
19061 return __a < __b ? -1ll : 0ll;
19064 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19065 vclt_p8 (poly8x8_t __a, poly8x8_t __b)
19067 return (uint8x8_t) __builtin_aarch64_cmgtv8qi ((int8x8_t) __b,
19068 (int8x8_t) __a);
19071 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19072 vclt_s8 (int8x8_t __a, int8x8_t __b)
19074 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
19077 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19078 vclt_s16 (int16x4_t __a, int16x4_t __b)
19080 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
19083 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19084 vclt_s32 (int32x2_t __a, int32x2_t __b)
19086 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
19089 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19090 vclt_s64 (int64x1_t __a, int64x1_t __b)
19092 return __a < __b ? -1ll : 0ll;
19095 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19096 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
19098 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
19099 (int8x8_t) __a);
19102 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19103 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
19105 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
19106 (int16x4_t) __a);
19109 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19110 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
19112 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
19113 (int32x2_t) __a);
19116 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19117 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
19119 return __a < __b ? -1ll : 0ll;
19122 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19123 vcltq_f32 (float32x4_t __a, float32x4_t __b)
19125 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
19128 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19129 vcltq_f64 (float64x2_t __a, float64x2_t __b)
19131 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
19134 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19135 vcltq_p8 (poly8x16_t __a, poly8x16_t __b)
19137 return (uint8x16_t) __builtin_aarch64_cmgtv16qi ((int8x16_t) __b,
19138 (int8x16_t) __a);
19141 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19142 vcltq_s8 (int8x16_t __a, int8x16_t __b)
19144 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
19147 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19148 vcltq_s16 (int16x8_t __a, int16x8_t __b)
19150 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
19153 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19154 vcltq_s32 (int32x4_t __a, int32x4_t __b)
19156 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
19159 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19160 vcltq_s64 (int64x2_t __a, int64x2_t __b)
19162 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
19165 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19166 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
19168 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
19169 (int8x16_t) __a);
19172 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19173 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
19175 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
19176 (int16x8_t) __a);
19179 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19180 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
19182 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
19183 (int32x4_t) __a);
19186 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19187 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
19189 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
19190 (int64x2_t) __a);
19193 /* vclt - scalar. */
19195 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19196 vclts_f32 (float32_t __a, float32_t __b)
19198 return __a < __b ? -1 : 0;
19201 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19202 vcltd_s64 (int64x1_t __a, int64x1_t __b)
19204 return __a < __b ? -1ll : 0ll;
19207 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19208 vcltd_u64 (uint64x1_t __a, uint64x1_t __b)
19210 return __a < __b ? -1ll : 0ll;
19213 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19214 vcltd_f64 (float64_t __a, float64_t __b)
19216 return __a < __b ? -1ll : 0ll;
19219 /* vcltz - vector. */
19221 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19222 vcltz_f32 (float32x2_t __a)
19224 float32x2_t __b = {0.0f, 0.0f};
19225 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
19228 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19229 vcltz_f64 (float64x1_t __a)
19231 return __a < 0.0 ? -1ll : 0ll;
19234 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19235 vcltz_p8 (poly8x8_t __a)
19237 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
19238 return (uint8x8_t) __builtin_aarch64_cmltv8qi ((int8x8_t) __a,
19239 (int8x8_t) __b);
19242 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19243 vcltz_s8 (int8x8_t __a)
19245 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
19246 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
19249 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19250 vcltz_s16 (int16x4_t __a)
19252 int16x4_t __b = {0, 0, 0, 0};
19253 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
19256 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19257 vcltz_s32 (int32x2_t __a)
19259 int32x2_t __b = {0, 0};
19260 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
19263 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19264 vcltz_s64 (int64x1_t __a)
19266 return __a < 0ll ? -1ll : 0ll;
19269 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19270 vcltzq_f32 (float32x4_t __a)
19272 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
19273 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
19276 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19277 vcltzq_f64 (float64x2_t __a)
19279 float64x2_t __b = {0.0, 0.0};
19280 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
19283 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19284 vcltzq_p8 (poly8x16_t __a)
19286 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
19287 0, 0, 0, 0, 0, 0, 0, 0};
19288 return (uint8x16_t) __builtin_aarch64_cmltv16qi ((int8x16_t) __a,
19289 (int8x16_t) __b);
19292 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19293 vcltzq_s8 (int8x16_t __a)
19295 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
19296 0, 0, 0, 0, 0, 0, 0, 0};
19297 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
19300 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19301 vcltzq_s16 (int16x8_t __a)
19303 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
19304 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
19307 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19308 vcltzq_s32 (int32x4_t __a)
19310 int32x4_t __b = {0, 0, 0, 0};
19311 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
19314 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19315 vcltzq_s64 (int64x2_t __a)
19317 int64x2_t __b = {0, 0};
19318 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
19321 /* vcltz - scalar. */
19323 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19324 vcltzs_f32 (float32_t __a)
19326 return __a < 0.0f ? -1 : 0;
19329 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19330 vcltzd_s64 (int64x1_t __a)
19332 return __a < 0 ? -1ll : 0ll;
19335 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19336 vcltzd_u64 (int64x1_t __a)
19338 return __a < 0 ? -1ll : 0ll;
19341 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19342 vcltzd_f64 (float64_t __a)
19344 return __a < 0.0 ? -1ll : 0ll;
19347 /* vcvt (double -> float). */
19349 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19350 vcvt_f32_f64 (float64x2_t __a)
19352 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
19355 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19356 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
19358 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
19361 /* vcvt (float -> double). */
19363 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19364 vcvt_f64_f32 (float32x2_t __a)
19367 return __builtin_aarch64_float_extend_lo_v2df (__a);
19370 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19371 vcvt_high_f64_f32 (float32x4_t __a)
19373 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
19376 /* vcvt (<u>int -> float) */
19378 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19379 vcvtd_f64_s64 (int64_t __a)
19381 return (float64_t) __a;
19384 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19385 vcvtd_f64_u64 (uint64_t __a)
19387 return (float64_t) __a;
19390 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19391 vcvts_f32_s32 (int32_t __a)
19393 return (float32_t) __a;
19396 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
19397 vcvts_f32_u32 (uint32_t __a)
19399 return (float32_t) __a;
19402 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19403 vcvt_f32_s32 (int32x2_t __a)
19405 return __builtin_aarch64_floatv2siv2sf (__a);
19408 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19409 vcvt_f32_u32 (uint32x2_t __a)
19411 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
19414 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19415 vcvtq_f32_s32 (int32x4_t __a)
19417 return __builtin_aarch64_floatv4siv4sf (__a);
19420 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19421 vcvtq_f32_u32 (uint32x4_t __a)
19423 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
19426 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19427 vcvtq_f64_s64 (int64x2_t __a)
19429 return __builtin_aarch64_floatv2div2df (__a);
19432 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19433 vcvtq_f64_u64 (uint64x2_t __a)
19435 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
19438 /* vcvt (float -> <u>int) */
19440 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19441 vcvtd_s64_f64 (float64_t __a)
19443 return (int64_t) __a;
19446 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19447 vcvtd_u64_f64 (float64_t __a)
19449 return (uint64_t) __a;
19452 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19453 vcvts_s32_f32 (float32_t __a)
19455 return (int32_t) __a;
19458 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19459 vcvts_u32_f32 (float32_t __a)
19461 return (uint32_t) __a;
19464 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19465 vcvt_s32_f32 (float32x2_t __a)
19467 return __builtin_aarch64_lbtruncv2sfv2si (__a);
19470 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19471 vcvt_u32_f32 (float32x2_t __a)
19473 /* TODO: This cast should go away when builtins have
19474 their correct types. */
19475 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
19478 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19479 vcvtq_s32_f32 (float32x4_t __a)
19481 return __builtin_aarch64_lbtruncv4sfv4si (__a);
19484 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19485 vcvtq_u32_f32 (float32x4_t __a)
19487 /* TODO: This cast should go away when builtins have
19488 their correct types. */
19489 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
19492 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19493 vcvtq_s64_f64 (float64x2_t __a)
19495 return __builtin_aarch64_lbtruncv2dfv2di (__a);
19498 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19499 vcvtq_u64_f64 (float64x2_t __a)
19501 /* TODO: This cast should go away when builtins have
19502 their correct types. */
19503 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
19506 /* vcvta */
19508 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19509 vcvtad_s64_f64 (float64_t __a)
19511 return __builtin_aarch64_lrounddfdi (__a);
19514 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19515 vcvtad_u64_f64 (float64_t __a)
19517 return __builtin_aarch64_lroundudfdi (__a);
19520 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19521 vcvtas_s32_f32 (float32_t __a)
19523 return __builtin_aarch64_lroundsfsi (__a);
19526 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19527 vcvtas_u32_f32 (float32_t __a)
19529 return __builtin_aarch64_lroundusfsi (__a);
19532 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19533 vcvta_s32_f32 (float32x2_t __a)
19535 return __builtin_aarch64_lroundv2sfv2si (__a);
19538 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19539 vcvta_u32_f32 (float32x2_t __a)
19541 /* TODO: This cast should go away when builtins have
19542 their correct types. */
19543 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
19546 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19547 vcvtaq_s32_f32 (float32x4_t __a)
19549 return __builtin_aarch64_lroundv4sfv4si (__a);
19552 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19553 vcvtaq_u32_f32 (float32x4_t __a)
19555 /* TODO: This cast should go away when builtins have
19556 their correct types. */
19557 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
19560 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19561 vcvtaq_s64_f64 (float64x2_t __a)
19563 return __builtin_aarch64_lroundv2dfv2di (__a);
19566 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19567 vcvtaq_u64_f64 (float64x2_t __a)
19569 /* TODO: This cast should go away when builtins have
19570 their correct types. */
19571 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
19574 /* vcvtm */
19576 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19577 vcvtmd_s64_f64 (float64_t __a)
19579 return __builtin_lfloor (__a);
19582 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19583 vcvtmd_u64_f64 (float64_t __a)
19585 return __builtin_aarch64_lfloorudfdi (__a);
19588 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19589 vcvtms_s32_f32 (float32_t __a)
19591 return __builtin_ifloorf (__a);
19594 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19595 vcvtms_u32_f32 (float32_t __a)
19597 return __builtin_aarch64_lfloorusfsi (__a);
19600 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19601 vcvtm_s32_f32 (float32x2_t __a)
19603 return __builtin_aarch64_lfloorv2sfv2si (__a);
19606 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19607 vcvtm_u32_f32 (float32x2_t __a)
19609 /* TODO: This cast should go away when builtins have
19610 their correct types. */
19611 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
19614 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19615 vcvtmq_s32_f32 (float32x4_t __a)
19617 return __builtin_aarch64_lfloorv4sfv4si (__a);
19620 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19621 vcvtmq_u32_f32 (float32x4_t __a)
19623 /* TODO: This cast should go away when builtins have
19624 their correct types. */
19625 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
19628 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19629 vcvtmq_s64_f64 (float64x2_t __a)
19631 return __builtin_aarch64_lfloorv2dfv2di (__a);
19634 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19635 vcvtmq_u64_f64 (float64x2_t __a)
19637 /* TODO: This cast should go away when builtins have
19638 their correct types. */
19639 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
19642 /* vcvtn */
19644 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19645 vcvtnd_s64_f64 (float64_t __a)
19647 return __builtin_aarch64_lfrintndfdi (__a);
19650 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19651 vcvtnd_u64_f64 (float64_t __a)
19653 return __builtin_aarch64_lfrintnudfdi (__a);
19656 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19657 vcvtns_s32_f32 (float32_t __a)
19659 return __builtin_aarch64_lfrintnsfsi (__a);
19662 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19663 vcvtns_u32_f32 (float32_t __a)
19665 return __builtin_aarch64_lfrintnusfsi (__a);
19668 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19669 vcvtn_s32_f32 (float32x2_t __a)
19671 return __builtin_aarch64_lfrintnv2sfv2si (__a);
19674 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19675 vcvtn_u32_f32 (float32x2_t __a)
19677 /* TODO: This cast should go away when builtins have
19678 their correct types. */
19679 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
19682 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19683 vcvtnq_s32_f32 (float32x4_t __a)
19685 return __builtin_aarch64_lfrintnv4sfv4si (__a);
19688 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19689 vcvtnq_u32_f32 (float32x4_t __a)
19691 /* TODO: This cast should go away when builtins have
19692 their correct types. */
19693 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
19696 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19697 vcvtnq_s64_f64 (float64x2_t __a)
19699 return __builtin_aarch64_lfrintnv2dfv2di (__a);
19702 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19703 vcvtnq_u64_f64 (float64x2_t __a)
19705 /* TODO: This cast should go away when builtins have
19706 their correct types. */
19707 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
19710 /* vcvtp */
19712 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19713 vcvtpd_s64_f64 (float64_t __a)
19715 return __builtin_lceil (__a);
19718 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19719 vcvtpd_u64_f64 (float64_t __a)
19721 return __builtin_aarch64_lceiludfdi (__a);
19724 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19725 vcvtps_s32_f32 (float32_t __a)
19727 return __builtin_iceilf (__a);
19730 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
19731 vcvtps_u32_f32 (float32_t __a)
19733 return __builtin_aarch64_lceilusfsi (__a);
19736 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19737 vcvtp_s32_f32 (float32x2_t __a)
19739 return __builtin_aarch64_lceilv2sfv2si (__a);
19742 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19743 vcvtp_u32_f32 (float32x2_t __a)
19745 /* TODO: This cast should go away when builtins have
19746 their correct types. */
19747 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
19750 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19751 vcvtpq_s32_f32 (float32x4_t __a)
19753 return __builtin_aarch64_lceilv4sfv4si (__a);
19756 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19757 vcvtpq_u32_f32 (float32x4_t __a)
19759 /* TODO: This cast should go away when builtins have
19760 their correct types. */
19761 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
19764 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19765 vcvtpq_s64_f64 (float64x2_t __a)
19767 return __builtin_aarch64_lceilv2dfv2di (__a);
19770 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19771 vcvtpq_u64_f64 (float64x2_t __a)
19773 /* TODO: This cast should go away when builtins have
19774 their correct types. */
19775 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
19778 /* vdup */
19780 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19781 vdupb_lane_s8 (int8x16_t a, int const b)
19783 return __aarch64_vgetq_lane_s8 (a, b);
19786 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
19787 vdupb_lane_u8 (uint8x16_t a, int const b)
19789 return __aarch64_vgetq_lane_u8 (a, b);
19792 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19793 vduph_lane_s16 (int16x8_t a, int const b)
19795 return __aarch64_vgetq_lane_s16 (a, b);
19798 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
19799 vduph_lane_u16 (uint16x8_t a, int const b)
19801 return __aarch64_vgetq_lane_u16 (a, b);
19804 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19805 vdups_lane_s32 (int32x4_t a, int const b)
19807 return __aarch64_vgetq_lane_s32 (a, b);
19810 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
19811 vdups_lane_u32 (uint32x4_t a, int const b)
19813 return __aarch64_vgetq_lane_u32 (a, b);
19816 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19817 vdupd_lane_s64 (int64x2_t a, int const b)
19819 return __aarch64_vgetq_lane_s64 (a, b);
19822 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19823 vdupd_lane_u64 (uint64x2_t a, int const b)
19825 return __aarch64_vgetq_lane_u64 (a, b);
19828 /* vld1 */
19830 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19831 vld1_f32 (const float32_t *a)
19833 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
19836 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19837 vld1_f64 (const float64_t *a)
19839 return *a;
19842 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
19843 vld1_p8 (const poly8_t *a)
19845 return (poly8x8_t)
19846 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19849 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
19850 vld1_p16 (const poly16_t *a)
19852 return (poly16x4_t)
19853 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19856 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19857 vld1_s8 (const int8_t *a)
19859 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19862 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19863 vld1_s16 (const int16_t *a)
19865 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19868 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19869 vld1_s32 (const int32_t *a)
19871 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
19874 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19875 vld1_s64 (const int64_t *a)
19877 return *a;
19880 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19881 vld1_u8 (const uint8_t *a)
19883 return (uint8x8_t)
19884 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
19887 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19888 vld1_u16 (const uint16_t *a)
19890 return (uint16x4_t)
19891 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
19894 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19895 vld1_u32 (const uint32_t *a)
19897 return (uint32x2_t)
19898 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
19901 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
19902 vld1_u64 (const uint64_t *a)
19904 return *a;
19907 /* vld1q */
19909 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19910 vld1q_f32 (const float32_t *a)
19912 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
19915 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19916 vld1q_f64 (const float64_t *a)
19918 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
19921 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
19922 vld1q_p8 (const poly8_t *a)
19924 return (poly8x16_t)
19925 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19928 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
19929 vld1q_p16 (const poly16_t *a)
19931 return (poly16x8_t)
19932 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19935 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19936 vld1q_s8 (const int8_t *a)
19938 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19941 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19942 vld1q_s16 (const int16_t *a)
19944 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19947 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19948 vld1q_s32 (const int32_t *a)
19950 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
19953 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19954 vld1q_s64 (const int64_t *a)
19956 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
19959 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
19960 vld1q_u8 (const uint8_t *a)
19962 return (uint8x16_t)
19963 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
19966 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19967 vld1q_u16 (const uint16_t *a)
19969 return (uint16x8_t)
19970 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
19973 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19974 vld1q_u32 (const uint32_t *a)
19976 return (uint32x4_t)
19977 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
19980 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
19981 vld1q_u64 (const uint64_t *a)
19983 return (uint64x2_t)
19984 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
19987 /* vldn */
19989 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
19990 vld2_s64 (const int64_t * __a)
19992 int64x1x2_t ret;
19993 __builtin_aarch64_simd_oi __o;
19994 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
19995 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
19996 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
19997 return ret;
20000 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
20001 vld2_u64 (const uint64_t * __a)
20003 uint64x1x2_t ret;
20004 __builtin_aarch64_simd_oi __o;
20005 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
20006 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
20007 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
20008 return ret;
20011 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
20012 vld2_f64 (const float64_t * __a)
20014 float64x1x2_t ret;
20015 __builtin_aarch64_simd_oi __o;
20016 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
20017 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 0);
20018 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregoidf (__o, 1);
20019 return ret;
20022 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
20023 vld2_s8 (const int8_t * __a)
20025 int8x8x2_t ret;
20026 __builtin_aarch64_simd_oi __o;
20027 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20028 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20029 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20030 return ret;
20033 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
20034 vld2_p8 (const poly8_t * __a)
20036 poly8x8x2_t ret;
20037 __builtin_aarch64_simd_oi __o;
20038 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20039 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20040 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20041 return ret;
20044 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
20045 vld2_s16 (const int16_t * __a)
20047 int16x4x2_t ret;
20048 __builtin_aarch64_simd_oi __o;
20049 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20050 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20051 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20052 return ret;
20055 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
20056 vld2_p16 (const poly16_t * __a)
20058 poly16x4x2_t ret;
20059 __builtin_aarch64_simd_oi __o;
20060 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20061 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20062 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20063 return ret;
20066 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
20067 vld2_s32 (const int32_t * __a)
20069 int32x2x2_t ret;
20070 __builtin_aarch64_simd_oi __o;
20071 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
20072 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
20073 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
20074 return ret;
20077 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
20078 vld2_u8 (const uint8_t * __a)
20080 uint8x8x2_t ret;
20081 __builtin_aarch64_simd_oi __o;
20082 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
20083 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
20084 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
20085 return ret;
20088 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
20089 vld2_u16 (const uint16_t * __a)
20091 uint16x4x2_t ret;
20092 __builtin_aarch64_simd_oi __o;
20093 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
20094 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
20095 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
20096 return ret;
20099 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
20100 vld2_u32 (const uint32_t * __a)
20102 uint32x2x2_t ret;
20103 __builtin_aarch64_simd_oi __o;
20104 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
20105 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
20106 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
20107 return ret;
20110 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
20111 vld2_f32 (const float32_t * __a)
20113 float32x2x2_t ret;
20114 __builtin_aarch64_simd_oi __o;
20115 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
20116 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
20117 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
20118 return ret;
20121 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
20122 vld2q_s8 (const int8_t * __a)
20124 int8x16x2_t ret;
20125 __builtin_aarch64_simd_oi __o;
20126 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20127 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20128 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20129 return ret;
20132 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
20133 vld2q_p8 (const poly8_t * __a)
20135 poly8x16x2_t ret;
20136 __builtin_aarch64_simd_oi __o;
20137 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20138 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20139 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20140 return ret;
20143 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
20144 vld2q_s16 (const int16_t * __a)
20146 int16x8x2_t ret;
20147 __builtin_aarch64_simd_oi __o;
20148 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20149 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20150 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20151 return ret;
20154 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
20155 vld2q_p16 (const poly16_t * __a)
20157 poly16x8x2_t ret;
20158 __builtin_aarch64_simd_oi __o;
20159 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20160 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20161 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20162 return ret;
20165 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
20166 vld2q_s32 (const int32_t * __a)
20168 int32x4x2_t ret;
20169 __builtin_aarch64_simd_oi __o;
20170 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
20171 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
20172 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
20173 return ret;
20176 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
20177 vld2q_s64 (const int64_t * __a)
20179 int64x2x2_t ret;
20180 __builtin_aarch64_simd_oi __o;
20181 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
20182 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
20183 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
20184 return ret;
20187 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
20188 vld2q_u8 (const uint8_t * __a)
20190 uint8x16x2_t ret;
20191 __builtin_aarch64_simd_oi __o;
20192 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
20193 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
20194 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
20195 return ret;
20198 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
20199 vld2q_u16 (const uint16_t * __a)
20201 uint16x8x2_t ret;
20202 __builtin_aarch64_simd_oi __o;
20203 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
20204 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
20205 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
20206 return ret;
20209 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
20210 vld2q_u32 (const uint32_t * __a)
20212 uint32x4x2_t ret;
20213 __builtin_aarch64_simd_oi __o;
20214 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
20215 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
20216 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
20217 return ret;
20220 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
20221 vld2q_u64 (const uint64_t * __a)
20223 uint64x2x2_t ret;
20224 __builtin_aarch64_simd_oi __o;
20225 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
20226 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
20227 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
20228 return ret;
20231 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
20232 vld2q_f32 (const float32_t * __a)
20234 float32x4x2_t ret;
20235 __builtin_aarch64_simd_oi __o;
20236 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
20237 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
20238 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
20239 return ret;
20242 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
20243 vld2q_f64 (const float64_t * __a)
20245 float64x2x2_t ret;
20246 __builtin_aarch64_simd_oi __o;
20247 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
20248 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
20249 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
20250 return ret;
20253 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
20254 vld3_s64 (const int64_t * __a)
20256 int64x1x3_t ret;
20257 __builtin_aarch64_simd_ci __o;
20258 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20259 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20260 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20261 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20262 return ret;
20265 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
20266 vld3_u64 (const uint64_t * __a)
20268 uint64x1x3_t ret;
20269 __builtin_aarch64_simd_ci __o;
20270 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
20271 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
20272 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
20273 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
20274 return ret;
20277 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
20278 vld3_f64 (const float64_t * __a)
20280 float64x1x3_t ret;
20281 __builtin_aarch64_simd_ci __o;
20282 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
20283 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 0);
20284 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 1);
20285 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregcidf (__o, 2);
20286 return ret;
20289 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
20290 vld3_s8 (const int8_t * __a)
20292 int8x8x3_t ret;
20293 __builtin_aarch64_simd_ci __o;
20294 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20295 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20296 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20297 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20298 return ret;
20301 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
20302 vld3_p8 (const poly8_t * __a)
20304 poly8x8x3_t ret;
20305 __builtin_aarch64_simd_ci __o;
20306 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20307 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20308 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20309 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20310 return ret;
20313 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
20314 vld3_s16 (const int16_t * __a)
20316 int16x4x3_t ret;
20317 __builtin_aarch64_simd_ci __o;
20318 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20319 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20320 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20321 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20322 return ret;
20325 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
20326 vld3_p16 (const poly16_t * __a)
20328 poly16x4x3_t ret;
20329 __builtin_aarch64_simd_ci __o;
20330 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20331 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20332 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20333 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20334 return ret;
20337 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
20338 vld3_s32 (const int32_t * __a)
20340 int32x2x3_t ret;
20341 __builtin_aarch64_simd_ci __o;
20342 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20343 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20344 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20345 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20346 return ret;
20349 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
20350 vld3_u8 (const uint8_t * __a)
20352 uint8x8x3_t ret;
20353 __builtin_aarch64_simd_ci __o;
20354 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
20355 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
20356 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
20357 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
20358 return ret;
20361 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
20362 vld3_u16 (const uint16_t * __a)
20364 uint16x4x3_t ret;
20365 __builtin_aarch64_simd_ci __o;
20366 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
20367 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
20368 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
20369 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
20370 return ret;
20373 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
20374 vld3_u32 (const uint32_t * __a)
20376 uint32x2x3_t ret;
20377 __builtin_aarch64_simd_ci __o;
20378 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
20379 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
20380 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
20381 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
20382 return ret;
20385 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
20386 vld3_f32 (const float32_t * __a)
20388 float32x2x3_t ret;
20389 __builtin_aarch64_simd_ci __o;
20390 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
20391 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
20392 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
20393 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
20394 return ret;
20397 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
20398 vld3q_s8 (const int8_t * __a)
20400 int8x16x3_t ret;
20401 __builtin_aarch64_simd_ci __o;
20402 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20403 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20404 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20405 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20406 return ret;
20409 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
20410 vld3q_p8 (const poly8_t * __a)
20412 poly8x16x3_t ret;
20413 __builtin_aarch64_simd_ci __o;
20414 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20415 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20416 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20417 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20418 return ret;
20421 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
20422 vld3q_s16 (const int16_t * __a)
20424 int16x8x3_t ret;
20425 __builtin_aarch64_simd_ci __o;
20426 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20427 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20428 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20429 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20430 return ret;
20433 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
20434 vld3q_p16 (const poly16_t * __a)
20436 poly16x8x3_t ret;
20437 __builtin_aarch64_simd_ci __o;
20438 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20439 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20440 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20441 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20442 return ret;
20445 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
20446 vld3q_s32 (const int32_t * __a)
20448 int32x4x3_t ret;
20449 __builtin_aarch64_simd_ci __o;
20450 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
20451 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
20452 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
20453 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
20454 return ret;
20457 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
20458 vld3q_s64 (const int64_t * __a)
20460 int64x2x3_t ret;
20461 __builtin_aarch64_simd_ci __o;
20462 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
20463 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
20464 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
20465 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
20466 return ret;
20469 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
20470 vld3q_u8 (const uint8_t * __a)
20472 uint8x16x3_t ret;
20473 __builtin_aarch64_simd_ci __o;
20474 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
20475 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
20476 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
20477 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
20478 return ret;
20481 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
20482 vld3q_u16 (const uint16_t * __a)
20484 uint16x8x3_t ret;
20485 __builtin_aarch64_simd_ci __o;
20486 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
20487 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
20488 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
20489 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
20490 return ret;
20493 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
20494 vld3q_u32 (const uint32_t * __a)
20496 uint32x4x3_t ret;
20497 __builtin_aarch64_simd_ci __o;
20498 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
20499 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
20500 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
20501 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
20502 return ret;
20505 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
20506 vld3q_u64 (const uint64_t * __a)
20508 uint64x2x3_t ret;
20509 __builtin_aarch64_simd_ci __o;
20510 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
20511 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
20512 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
20513 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
20514 return ret;
20517 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
20518 vld3q_f32 (const float32_t * __a)
20520 float32x4x3_t ret;
20521 __builtin_aarch64_simd_ci __o;
20522 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
20523 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
20524 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
20525 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
20526 return ret;
20529 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
20530 vld3q_f64 (const float64_t * __a)
20532 float64x2x3_t ret;
20533 __builtin_aarch64_simd_ci __o;
20534 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
20535 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
20536 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
20537 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
20538 return ret;
20541 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
20542 vld4_s64 (const int64_t * __a)
20544 int64x1x4_t ret;
20545 __builtin_aarch64_simd_xi __o;
20546 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
20547 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
20548 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
20549 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
20550 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
20551 return ret;
20554 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
20555 vld4_u64 (const uint64_t * __a)
20557 uint64x1x4_t ret;
20558 __builtin_aarch64_simd_xi __o;
20559 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
20560 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
20561 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
20562 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
20563 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
20564 return ret;
20567 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
20568 vld4_f64 (const float64_t * __a)
20570 float64x1x4_t ret;
20571 __builtin_aarch64_simd_xi __o;
20572 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
20573 ret.val[0] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 0);
20574 ret.val[1] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 1);
20575 ret.val[2] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 2);
20576 ret.val[3] = (float64x1_t) __builtin_aarch64_get_dregxidf (__o, 3);
20577 return ret;
20580 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
20581 vld4_s8 (const int8_t * __a)
20583 int8x8x4_t ret;
20584 __builtin_aarch64_simd_xi __o;
20585 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
20586 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
20587 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
20588 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
20589 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
20590 return ret;
20593 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
20594 vld4_p8 (const poly8_t * __a)
20596 poly8x8x4_t ret;
20597 __builtin_aarch64_simd_xi __o;
20598 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
20599 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
20600 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
20601 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
20602 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
20603 return ret;
20606 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
20607 vld4_s16 (const int16_t * __a)
20609 int16x4x4_t ret;
20610 __builtin_aarch64_simd_xi __o;
20611 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
20612 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
20613 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
20614 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
20615 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
20616 return ret;
20619 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
20620 vld4_p16 (const poly16_t * __a)
20622 poly16x4x4_t ret;
20623 __builtin_aarch64_simd_xi __o;
20624 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
20625 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
20626 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
20627 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
20628 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
20629 return ret;
20632 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
20633 vld4_s32 (const int32_t * __a)
20635 int32x2x4_t ret;
20636 __builtin_aarch64_simd_xi __o;
20637 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
20638 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
20639 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
20640 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
20641 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
20642 return ret;
20645 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
20646 vld4_u8 (const uint8_t * __a)
20648 uint8x8x4_t ret;
20649 __builtin_aarch64_simd_xi __o;
20650 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
20651 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
20652 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
20653 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
20654 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
20655 return ret;
20658 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
20659 vld4_u16 (const uint16_t * __a)
20661 uint16x4x4_t ret;
20662 __builtin_aarch64_simd_xi __o;
20663 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
20664 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
20665 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
20666 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
20667 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
20668 return ret;
20671 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
20672 vld4_u32 (const uint32_t * __a)
20674 uint32x2x4_t ret;
20675 __builtin_aarch64_simd_xi __o;
20676 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
20677 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
20678 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
20679 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
20680 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
20681 return ret;
20684 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
20685 vld4_f32 (const float32_t * __a)
20687 float32x2x4_t ret;
20688 __builtin_aarch64_simd_xi __o;
20689 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
20690 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
20691 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
20692 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
20693 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
20694 return ret;
20697 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
20698 vld4q_s8 (const int8_t * __a)
20700 int8x16x4_t ret;
20701 __builtin_aarch64_simd_xi __o;
20702 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20703 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20704 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20705 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20706 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20707 return ret;
20710 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
20711 vld4q_p8 (const poly8_t * __a)
20713 poly8x16x4_t ret;
20714 __builtin_aarch64_simd_xi __o;
20715 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20716 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20717 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20718 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20719 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20720 return ret;
20723 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
20724 vld4q_s16 (const int16_t * __a)
20726 int16x8x4_t ret;
20727 __builtin_aarch64_simd_xi __o;
20728 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20729 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20730 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20731 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20732 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20733 return ret;
20736 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
20737 vld4q_p16 (const poly16_t * __a)
20739 poly16x8x4_t ret;
20740 __builtin_aarch64_simd_xi __o;
20741 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20742 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20743 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20744 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20745 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20746 return ret;
20749 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
20750 vld4q_s32 (const int32_t * __a)
20752 int32x4x4_t ret;
20753 __builtin_aarch64_simd_xi __o;
20754 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
20755 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
20756 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
20757 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
20758 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
20759 return ret;
20762 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
20763 vld4q_s64 (const int64_t * __a)
20765 int64x2x4_t ret;
20766 __builtin_aarch64_simd_xi __o;
20767 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
20768 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
20769 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
20770 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
20771 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
20772 return ret;
20775 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
20776 vld4q_u8 (const uint8_t * __a)
20778 uint8x16x4_t ret;
20779 __builtin_aarch64_simd_xi __o;
20780 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
20781 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
20782 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
20783 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
20784 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
20785 return ret;
20788 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
20789 vld4q_u16 (const uint16_t * __a)
20791 uint16x8x4_t ret;
20792 __builtin_aarch64_simd_xi __o;
20793 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
20794 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
20795 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
20796 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
20797 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
20798 return ret;
20801 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
20802 vld4q_u32 (const uint32_t * __a)
20804 uint32x4x4_t ret;
20805 __builtin_aarch64_simd_xi __o;
20806 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
20807 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
20808 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
20809 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
20810 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
20811 return ret;
20814 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
20815 vld4q_u64 (const uint64_t * __a)
20817 uint64x2x4_t ret;
20818 __builtin_aarch64_simd_xi __o;
20819 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
20820 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
20821 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
20822 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
20823 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
20824 return ret;
20827 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
20828 vld4q_f32 (const float32_t * __a)
20830 float32x4x4_t ret;
20831 __builtin_aarch64_simd_xi __o;
20832 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
20833 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
20834 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
20835 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
20836 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
20837 return ret;
20840 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
20841 vld4q_f64 (const float64_t * __a)
20843 float64x2x4_t ret;
20844 __builtin_aarch64_simd_xi __o;
20845 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
20846 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
20847 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
20848 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
20849 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
20850 return ret;
20853 /* vmax */
20855 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20856 vmax_f32 (float32x2_t __a, float32x2_t __b)
20858 return __builtin_aarch64_smax_nanv2sf (__a, __b);
20861 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20862 vmax_s8 (int8x8_t __a, int8x8_t __b)
20864 return __builtin_aarch64_smaxv8qi (__a, __b);
20867 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20868 vmax_s16 (int16x4_t __a, int16x4_t __b)
20870 return __builtin_aarch64_smaxv4hi (__a, __b);
20873 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20874 vmax_s32 (int32x2_t __a, int32x2_t __b)
20876 return __builtin_aarch64_smaxv2si (__a, __b);
20879 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20880 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
20882 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
20883 (int8x8_t) __b);
20886 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20887 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
20889 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
20890 (int16x4_t) __b);
20893 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20894 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
20896 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
20897 (int32x2_t) __b);
20900 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20901 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
20903 return __builtin_aarch64_smax_nanv4sf (__a, __b);
20906 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20907 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
20909 return __builtin_aarch64_smax_nanv2df (__a, __b);
20912 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20913 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
20915 return __builtin_aarch64_smaxv16qi (__a, __b);
20918 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20919 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
20921 return __builtin_aarch64_smaxv8hi (__a, __b);
20924 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20925 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
20927 return __builtin_aarch64_smaxv4si (__a, __b);
20930 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20931 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
20933 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
20934 (int8x16_t) __b);
20937 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20938 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
20940 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
20941 (int16x8_t) __b);
20944 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20945 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
20947 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
20948 (int32x4_t) __b);
20951 /* vmaxnm */
20953 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20954 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
20956 return __builtin_aarch64_smaxv2sf (__a, __b);
20959 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20960 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
20962 return __builtin_aarch64_smaxv4sf (__a, __b);
20965 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20966 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
20968 return __builtin_aarch64_smaxv2df (__a, __b);
20971 /* vmaxv */
20973 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20974 vmaxv_f32 (float32x2_t __a)
20976 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), 0);
20979 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
20980 vmaxv_s8 (int8x8_t __a)
20982 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
20985 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
20986 vmaxv_s16 (int16x4_t __a)
20988 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
20991 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
20992 vmaxv_s32 (int32x2_t __a)
20994 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
20997 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
20998 vmaxv_u8 (uint8x8_t __a)
21000 return vget_lane_u8 ((uint8x8_t)
21001 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), 0);
21004 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21005 vmaxv_u16 (uint16x4_t __a)
21007 return vget_lane_u16 ((uint16x4_t)
21008 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), 0);
21011 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21012 vmaxv_u32 (uint32x2_t __a)
21014 return vget_lane_u32 ((uint32x2_t)
21015 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), 0);
21018 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21019 vmaxvq_f32 (float32x4_t __a)
21021 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), 0);
21024 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21025 vmaxvq_f64 (float64x2_t __a)
21027 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), 0);
21030 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21031 vmaxvq_s8 (int8x16_t __a)
21033 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
21036 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21037 vmaxvq_s16 (int16x8_t __a)
21039 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
21042 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21043 vmaxvq_s32 (int32x4_t __a)
21045 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
21048 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21049 vmaxvq_u8 (uint8x16_t __a)
21051 return vgetq_lane_u8 ((uint8x16_t)
21052 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), 0);
21055 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21056 vmaxvq_u16 (uint16x8_t __a)
21058 return vgetq_lane_u16 ((uint16x8_t)
21059 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), 0);
21062 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21063 vmaxvq_u32 (uint32x4_t __a)
21065 return vgetq_lane_u32 ((uint32x4_t)
21066 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), 0);
21069 /* vmaxnmv */
21071 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21072 vmaxnmv_f32 (float32x2_t __a)
21074 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), 0);
21077 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21078 vmaxnmvq_f32 (float32x4_t __a)
21080 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
21083 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21084 vmaxnmvq_f64 (float64x2_t __a)
21086 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
21089 /* vmin */
21091 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21092 vmin_f32 (float32x2_t __a, float32x2_t __b)
21094 return __builtin_aarch64_smin_nanv2sf (__a, __b);
21097 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21098 vmin_s8 (int8x8_t __a, int8x8_t __b)
21100 return __builtin_aarch64_sminv8qi (__a, __b);
21103 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21104 vmin_s16 (int16x4_t __a, int16x4_t __b)
21106 return __builtin_aarch64_sminv4hi (__a, __b);
21109 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21110 vmin_s32 (int32x2_t __a, int32x2_t __b)
21112 return __builtin_aarch64_sminv2si (__a, __b);
21115 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21116 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
21118 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
21119 (int8x8_t) __b);
21122 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21123 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
21125 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
21126 (int16x4_t) __b);
21129 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21130 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
21132 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
21133 (int32x2_t) __b);
21136 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21137 vminq_f32 (float32x4_t __a, float32x4_t __b)
21139 return __builtin_aarch64_smin_nanv4sf (__a, __b);
21142 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21143 vminq_f64 (float64x2_t __a, float64x2_t __b)
21145 return __builtin_aarch64_smin_nanv2df (__a, __b);
21148 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21149 vminq_s8 (int8x16_t __a, int8x16_t __b)
21151 return __builtin_aarch64_sminv16qi (__a, __b);
21154 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21155 vminq_s16 (int16x8_t __a, int16x8_t __b)
21157 return __builtin_aarch64_sminv8hi (__a, __b);
21160 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21161 vminq_s32 (int32x4_t __a, int32x4_t __b)
21163 return __builtin_aarch64_sminv4si (__a, __b);
21166 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21167 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
21169 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
21170 (int8x16_t) __b);
21173 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21174 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
21176 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
21177 (int16x8_t) __b);
21180 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21181 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
21183 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
21184 (int32x4_t) __b);
21187 /* vminnm */
21189 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21190 vminnm_f32 (float32x2_t __a, float32x2_t __b)
21192 return __builtin_aarch64_sminv2sf (__a, __b);
21195 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21196 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
21198 return __builtin_aarch64_sminv4sf (__a, __b);
21201 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21202 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
21204 return __builtin_aarch64_sminv2df (__a, __b);
21207 /* vminv */
21209 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21210 vminv_f32 (float32x2_t __a)
21212 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), 0);
21215 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21216 vminv_s8 (int8x8_t __a)
21218 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), 0);
21221 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21222 vminv_s16 (int16x4_t __a)
21224 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
21227 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21228 vminv_s32 (int32x2_t __a)
21230 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
21233 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21234 vminv_u8 (uint8x8_t __a)
21236 return vget_lane_u8 ((uint8x8_t)
21237 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), 0);
21240 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21241 vminv_u16 (uint16x4_t __a)
21243 return vget_lane_u16 ((uint16x4_t)
21244 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), 0);
21247 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21248 vminv_u32 (uint32x2_t __a)
21250 return vget_lane_u32 ((uint32x2_t)
21251 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), 0);
21254 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21255 vminvq_f32 (float32x4_t __a)
21257 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), 0);
21260 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21261 vminvq_f64 (float64x2_t __a)
21263 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), 0);
21266 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
21267 vminvq_s8 (int8x16_t __a)
21269 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
21272 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
21273 vminvq_s16 (int16x8_t __a)
21275 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
21278 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
21279 vminvq_s32 (int32x4_t __a)
21281 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
21284 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
21285 vminvq_u8 (uint8x16_t __a)
21287 return vgetq_lane_u8 ((uint8x16_t)
21288 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), 0);
21291 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
21292 vminvq_u16 (uint16x8_t __a)
21294 return vgetq_lane_u16 ((uint16x8_t)
21295 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), 0);
21298 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
21299 vminvq_u32 (uint32x4_t __a)
21301 return vgetq_lane_u32 ((uint32x4_t)
21302 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), 0);
21305 /* vminnmv */
21307 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21308 vminnmv_f32 (float32x2_t __a)
21310 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
21313 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
21314 vminnmvq_f32 (float32x4_t __a)
21316 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
21319 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
21320 vminnmvq_f64 (float64x2_t __a)
21322 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
21325 /* vmla */
21327 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21328 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21330 return a + b * c;
21333 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21334 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21336 return a + b * c;
21339 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21340 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21342 return a + b * c;
21345 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21346 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
21348 return a - b * c;
21351 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21352 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
21354 return a - b * c;
21357 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21358 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
21360 return a - b * c;
21363 /* vqabs */
21365 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21366 vqabsq_s64 (int64x2_t __a)
21368 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
21371 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21372 vqabsb_s8 (int8x1_t __a)
21374 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
21377 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21378 vqabsh_s16 (int16x1_t __a)
21380 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
21383 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21384 vqabss_s32 (int32x1_t __a)
21386 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
21389 /* vqadd */
21391 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21392 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
21394 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
21397 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21398 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
21400 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
21403 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21404 vqadds_s32 (int32x1_t __a, int32x1_t __b)
21406 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
21409 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21410 vqaddd_s64 (int64x1_t __a, int64x1_t __b)
21412 return (int64x1_t) __builtin_aarch64_sqadddi (__a, __b);
21415 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21416 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
21418 return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b);
21421 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21422 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
21424 return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b);
21427 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21428 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
21430 return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b);
21433 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21434 vqaddd_u64 (uint64x1_t __a, uint64x1_t __b)
21436 return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b);
21439 /* vqdmlal */
21441 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21442 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21444 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
21447 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21448 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21450 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
21453 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21454 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21455 int const __d)
21457 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
21460 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21461 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21462 int const __d)
21464 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
21467 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21468 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21470 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
21473 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21474 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21476 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
21477 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __tmp, __d);
21480 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21481 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21483 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
21486 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21487 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21489 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
21492 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21493 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21495 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
21498 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21499 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21501 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
21504 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21505 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21506 int const __d)
21508 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
21511 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21512 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21513 int const __d)
21515 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
21518 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21519 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21521 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
21524 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21525 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21527 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
21528 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __tmp, __d);
21531 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21532 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21534 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
21537 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21538 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21540 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
21543 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21544 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21546 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
21549 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21550 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21552 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
21555 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21556 vqdmlals_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21558 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
21561 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21562 vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21564 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
21567 /* vqdmlsl */
21569 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21570 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
21572 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
21575 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21576 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
21578 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
21581 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21582 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21583 int const __d)
21585 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
21588 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21589 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
21590 int const __d)
21592 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
21595 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21596 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
21598 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
21601 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21602 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
21604 int16x8_t __tmp = vcombine_s16 (__c, vcreate_s16 (INT64_C (0)));
21605 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __tmp, __d);
21608 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21609 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
21611 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
21614 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21615 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
21617 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
21620 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21621 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
21623 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
21626 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21627 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
21629 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
21632 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21633 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21634 int const __d)
21636 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
21639 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21640 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
21641 int const __d)
21643 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
21646 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21647 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
21649 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
21652 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21653 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
21655 int32x4_t __tmp = vcombine_s32 (__c, vcreate_s32 (INT64_C (0)));
21656 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __tmp, __d);
21659 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21660 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
21662 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
21665 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21666 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
21668 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
21671 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21672 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
21674 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
21677 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21678 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x8_t __c, const int __d)
21680 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
21683 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21684 vqdmlsls_s32 (int64x1_t __a, int32x1_t __b, int32x1_t __c)
21686 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
21689 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21690 vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d)
21692 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
21695 /* vqdmulh */
21697 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21698 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21700 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
21703 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21704 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21706 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
21709 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21710 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
21712 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
21715 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21716 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
21718 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
21721 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21722 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
21724 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
21727 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21728 vqdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21730 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
21733 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21734 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
21736 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
21739 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21740 vqdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21742 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
21745 /* vqdmull */
21747 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21748 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
21750 return __builtin_aarch64_sqdmullv4hi (__a, __b);
21753 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21754 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
21756 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
21759 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21760 vqdmull_high_lane_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21762 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
21765 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21766 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
21768 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
21771 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21772 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
21774 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
21777 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21778 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
21780 int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0)));
21781 return __builtin_aarch64_sqdmull_lanev4hi (__a, __tmp, __c);
21784 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21785 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
21787 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
21790 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21791 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
21793 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
21796 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21797 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
21799 return __builtin_aarch64_sqdmullv2si (__a, __b);
21802 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21803 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
21805 return __builtin_aarch64_sqdmull2v4si (__a, __b);
21808 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21809 vqdmull_high_lane_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21811 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
21814 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21815 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
21817 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
21820 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21821 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
21823 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
21826 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21827 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
21829 int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0)));
21830 return __builtin_aarch64_sqdmull_lanev2si (__a, __tmp, __c);
21833 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21834 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
21836 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
21839 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21840 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
21842 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
21845 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21846 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
21848 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
21851 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21852 vqdmullh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
21854 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
21857 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21858 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
21860 return (int64x1_t) __builtin_aarch64_sqdmullsi (__a, __b);
21863 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21864 vqdmulls_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
21866 return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c);
21869 /* vqmovn */
21871 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21872 vqmovn_s16 (int16x8_t __a)
21874 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
21877 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21878 vqmovn_s32 (int32x4_t __a)
21880 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
21883 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21884 vqmovn_s64 (int64x2_t __a)
21886 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
21889 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21890 vqmovn_u16 (uint16x8_t __a)
21892 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
21895 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21896 vqmovn_u32 (uint32x4_t __a)
21898 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
21901 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21902 vqmovn_u64 (uint64x2_t __a)
21904 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
21907 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21908 vqmovnh_s16 (int16x1_t __a)
21910 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
21913 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21914 vqmovns_s32 (int32x1_t __a)
21916 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
21919 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21920 vqmovnd_s64 (int64x1_t __a)
21922 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
21925 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
21926 vqmovnh_u16 (uint16x1_t __a)
21928 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
21931 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
21932 vqmovns_u32 (uint32x1_t __a)
21934 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
21937 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
21938 vqmovnd_u64 (uint64x1_t __a)
21940 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
21943 /* vqmovun */
21945 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21946 vqmovun_s16 (int16x8_t __a)
21948 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
21951 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21952 vqmovun_s32 (int32x4_t __a)
21954 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
21957 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21958 vqmovun_s64 (int64x2_t __a)
21960 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
21963 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21964 vqmovunh_s16 (int16x1_t __a)
21966 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
21969 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21970 vqmovuns_s32 (int32x1_t __a)
21972 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
21975 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
21976 vqmovund_s64 (int64x1_t __a)
21978 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
21981 /* vqneg */
21983 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21984 vqnegq_s64 (int64x2_t __a)
21986 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
21989 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
21990 vqnegb_s8 (int8x1_t __a)
21992 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
21995 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
21996 vqnegh_s16 (int16x1_t __a)
21998 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
22001 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22002 vqnegs_s32 (int32x1_t __a)
22004 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
22007 /* vqrdmulh */
22009 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22010 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22012 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
22015 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22016 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22018 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
22021 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22022 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
22024 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
22027 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22028 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
22030 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
22033 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22034 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
22036 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
22039 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22040 vqrdmulhh_lane_s16 (int16x1_t __a, int16x8_t __b, const int __c)
22042 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
22045 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22046 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
22048 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
22051 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22052 vqrdmulhs_lane_s32 (int32x1_t __a, int32x4_t __b, const int __c)
22054 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
22057 /* vqrshl */
22059 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22060 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
22062 return __builtin_aarch64_sqrshlv8qi (__a, __b);
22065 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22066 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
22068 return __builtin_aarch64_sqrshlv4hi (__a, __b);
22071 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22072 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
22074 return __builtin_aarch64_sqrshlv2si (__a, __b);
22077 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22078 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
22080 return __builtin_aarch64_sqrshldi (__a, __b);
22083 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22084 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
22086 return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b);
22089 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22090 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
22092 return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b);
22095 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22096 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
22098 return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b);
22101 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22102 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
22104 return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b);
22107 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22108 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
22110 return __builtin_aarch64_sqrshlv16qi (__a, __b);
22113 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22114 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
22116 return __builtin_aarch64_sqrshlv8hi (__a, __b);
22119 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22120 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
22122 return __builtin_aarch64_sqrshlv4si (__a, __b);
22125 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22126 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
22128 return __builtin_aarch64_sqrshlv2di (__a, __b);
22131 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22132 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
22134 return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b);
22137 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22138 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
22140 return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b);
22143 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22144 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
22146 return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b);
22149 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22150 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
22152 return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b);
22155 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22156 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
22158 return __builtin_aarch64_sqrshlqi (__a, __b);
22161 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22162 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
22164 return __builtin_aarch64_sqrshlhi (__a, __b);
22167 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22168 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
22170 return __builtin_aarch64_sqrshlsi (__a, __b);
22173 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22174 vqrshld_s64 (int64x1_t __a, int64x1_t __b)
22176 return __builtin_aarch64_sqrshldi (__a, __b);
22179 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22180 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22182 return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b);
22185 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22186 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22188 return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b);
22191 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22192 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
22194 return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b);
22197 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22198 vqrshld_u64 (uint64x1_t __a, uint64x1_t __b)
22200 return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b);
22203 /* vqrshrn */
22205 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22206 vqrshrn_n_s16 (int16x8_t __a, const int __b)
22208 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
22211 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22212 vqrshrn_n_s32 (int32x4_t __a, const int __b)
22214 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
22217 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22218 vqrshrn_n_s64 (int64x2_t __a, const int __b)
22220 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
22223 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22224 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
22226 return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b);
22229 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22230 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
22232 return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b);
22235 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22236 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
22238 return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b);
22241 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22242 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
22244 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
22247 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22248 vqrshrns_n_s32 (int32x1_t __a, const int __b)
22250 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
22253 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22254 vqrshrnd_n_s64 (int64x1_t __a, const int __b)
22256 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
22259 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22260 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
22262 return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b);
22265 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22266 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
22268 return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b);
22271 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22272 vqrshrnd_n_u64 (uint64x1_t __a, const int __b)
22274 return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b);
22277 /* vqrshrun */
22279 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22280 vqrshrun_n_s16 (int16x8_t __a, const int __b)
22282 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
22285 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22286 vqrshrun_n_s32 (int32x4_t __a, const int __b)
22288 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
22291 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22292 vqrshrun_n_s64 (int64x2_t __a, const int __b)
22294 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
22297 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22298 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
22300 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
22303 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22304 vqrshruns_n_s32 (int32x1_t __a, const int __b)
22306 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
22309 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22310 vqrshrund_n_s64 (int64x1_t __a, const int __b)
22312 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
22315 /* vqshl */
22317 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22318 vqshl_s8 (int8x8_t __a, int8x8_t __b)
22320 return __builtin_aarch64_sqshlv8qi (__a, __b);
22323 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22324 vqshl_s16 (int16x4_t __a, int16x4_t __b)
22326 return __builtin_aarch64_sqshlv4hi (__a, __b);
22329 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22330 vqshl_s32 (int32x2_t __a, int32x2_t __b)
22332 return __builtin_aarch64_sqshlv2si (__a, __b);
22335 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22336 vqshl_s64 (int64x1_t __a, int64x1_t __b)
22338 return __builtin_aarch64_sqshldi (__a, __b);
22341 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22342 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
22344 return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b);
22347 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22348 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
22350 return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b);
22353 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22354 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
22356 return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b);
22359 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22360 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
22362 return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b);
22365 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22366 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
22368 return __builtin_aarch64_sqshlv16qi (__a, __b);
22371 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22372 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
22374 return __builtin_aarch64_sqshlv8hi (__a, __b);
22377 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22378 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
22380 return __builtin_aarch64_sqshlv4si (__a, __b);
22383 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22384 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
22386 return __builtin_aarch64_sqshlv2di (__a, __b);
22389 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22390 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
22392 return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b);
22395 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22396 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
22398 return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b);
22401 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22402 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
22404 return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b);
22407 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22408 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
22410 return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b);
22413 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22414 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
22416 return __builtin_aarch64_sqshlqi (__a, __b);
22419 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22420 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
22422 return __builtin_aarch64_sqshlhi (__a, __b);
22425 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22426 vqshls_s32 (int32x1_t __a, int32x1_t __b)
22428 return __builtin_aarch64_sqshlsi (__a, __b);
22431 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22432 vqshld_s64 (int64x1_t __a, int64x1_t __b)
22434 return __builtin_aarch64_sqshldi (__a, __b);
22437 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22438 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
22440 return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b);
22443 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22444 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
22446 return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b);
22449 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22450 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
22452 return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b);
22455 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22456 vqshld_u64 (uint64x1_t __a, uint64x1_t __b)
22458 return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b);
22461 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22462 vqshl_n_s8 (int8x8_t __a, const int __b)
22464 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
22467 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22468 vqshl_n_s16 (int16x4_t __a, const int __b)
22470 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
22473 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22474 vqshl_n_s32 (int32x2_t __a, const int __b)
22476 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
22479 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22480 vqshl_n_s64 (int64x1_t __a, const int __b)
22482 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22485 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22486 vqshl_n_u8 (uint8x8_t __a, const int __b)
22488 return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b);
22491 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22492 vqshl_n_u16 (uint16x4_t __a, const int __b)
22494 return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b);
22497 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22498 vqshl_n_u32 (uint32x2_t __a, const int __b)
22500 return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b);
22503 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22504 vqshl_n_u64 (uint64x1_t __a, const int __b)
22506 return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b);
22509 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22510 vqshlq_n_s8 (int8x16_t __a, const int __b)
22512 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
22515 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22516 vqshlq_n_s16 (int16x8_t __a, const int __b)
22518 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
22521 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22522 vqshlq_n_s32 (int32x4_t __a, const int __b)
22524 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
22527 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22528 vqshlq_n_s64 (int64x2_t __a, const int __b)
22530 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
22533 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22534 vqshlq_n_u8 (uint8x16_t __a, const int __b)
22536 return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b);
22539 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22540 vqshlq_n_u16 (uint16x8_t __a, const int __b)
22542 return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b);
22545 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22546 vqshlq_n_u32 (uint32x4_t __a, const int __b)
22548 return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b);
22551 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22552 vqshlq_n_u64 (uint64x2_t __a, const int __b)
22554 return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b);
22557 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22558 vqshlb_n_s8 (int8x1_t __a, const int __b)
22560 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
22563 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22564 vqshlh_n_s16 (int16x1_t __a, const int __b)
22566 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
22569 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22570 vqshls_n_s32 (int32x1_t __a, const int __b)
22572 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
22575 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22576 vqshld_n_s64 (int64x1_t __a, const int __b)
22578 return (int64x1_t) __builtin_aarch64_sqshl_ndi (__a, __b);
22581 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22582 vqshlb_n_u8 (uint8x1_t __a, const int __b)
22584 return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b);
22587 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22588 vqshlh_n_u16 (uint16x1_t __a, const int __b)
22590 return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b);
22593 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22594 vqshls_n_u32 (uint32x1_t __a, const int __b)
22596 return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b);
22599 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22600 vqshld_n_u64 (uint64x1_t __a, const int __b)
22602 return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b);
22605 /* vqshlu */
22607 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22608 vqshlu_n_s8 (int8x8_t __a, const int __b)
22610 return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b);
22613 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22614 vqshlu_n_s16 (int16x4_t __a, const int __b)
22616 return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b);
22619 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22620 vqshlu_n_s32 (int32x2_t __a, const int __b)
22622 return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b);
22625 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22626 vqshlu_n_s64 (int64x1_t __a, const int __b)
22628 return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22631 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22632 vqshluq_n_s8 (int8x16_t __a, const int __b)
22634 return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b);
22637 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22638 vqshluq_n_s16 (int16x8_t __a, const int __b)
22640 return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b);
22643 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22644 vqshluq_n_s32 (int32x4_t __a, const int __b)
22646 return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b);
22649 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22650 vqshluq_n_s64 (int64x2_t __a, const int __b)
22652 return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b);
22655 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22656 vqshlub_n_s8 (int8x1_t __a, const int __b)
22658 return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b);
22661 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22662 vqshluh_n_s16 (int16x1_t __a, const int __b)
22664 return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b);
22667 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22668 vqshlus_n_s32 (int32x1_t __a, const int __b)
22670 return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b);
22673 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22674 vqshlud_n_s64 (int64x1_t __a, const int __b)
22676 return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b);
22679 /* vqshrn */
22681 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22682 vqshrn_n_s16 (int16x8_t __a, const int __b)
22684 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
22687 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22688 vqshrn_n_s32 (int32x4_t __a, const int __b)
22690 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
22693 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22694 vqshrn_n_s64 (int64x2_t __a, const int __b)
22696 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
22699 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22700 vqshrn_n_u16 (uint16x8_t __a, const int __b)
22702 return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b);
22705 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22706 vqshrn_n_u32 (uint32x4_t __a, const int __b)
22708 return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b);
22711 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22712 vqshrn_n_u64 (uint64x2_t __a, const int __b)
22714 return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b);
22717 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22718 vqshrnh_n_s16 (int16x1_t __a, const int __b)
22720 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
22723 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22724 vqshrns_n_s32 (int32x1_t __a, const int __b)
22726 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
22729 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22730 vqshrnd_n_s64 (int64x1_t __a, const int __b)
22732 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
22735 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22736 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
22738 return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b);
22741 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22742 vqshrns_n_u32 (uint32x1_t __a, const int __b)
22744 return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b);
22747 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22748 vqshrnd_n_u64 (uint64x1_t __a, const int __b)
22750 return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b);
22753 /* vqshrun */
22755 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22756 vqshrun_n_s16 (int16x8_t __a, const int __b)
22758 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
22761 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22762 vqshrun_n_s32 (int32x4_t __a, const int __b)
22764 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
22767 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22768 vqshrun_n_s64 (int64x2_t __a, const int __b)
22770 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
22773 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22774 vqshrunh_n_s16 (int16x1_t __a, const int __b)
22776 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
22779 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22780 vqshruns_n_s32 (int32x1_t __a, const int __b)
22782 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
22785 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22786 vqshrund_n_s64 (int64x1_t __a, const int __b)
22788 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
22791 /* vqsub */
22793 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
22794 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
22796 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
22799 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
22800 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
22802 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
22805 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
22806 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
22808 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
22811 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22812 vqsubd_s64 (int64x1_t __a, int64x1_t __b)
22814 return (int64x1_t) __builtin_aarch64_sqsubdi (__a, __b);
22817 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22818 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
22820 return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b);
22823 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22824 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
22826 return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b);
22829 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22830 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
22832 return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b);
22835 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22836 vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
22838 return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
22841 /* vrecpe */
22843 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22844 vrecpes_f32 (float32_t __a)
22846 return __builtin_aarch64_frecpesf (__a);
22849 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22850 vrecped_f64 (float64_t __a)
22852 return __builtin_aarch64_frecpedf (__a);
22855 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22856 vrecpe_f32 (float32x2_t __a)
22858 return __builtin_aarch64_frecpev2sf (__a);
22861 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22862 vrecpeq_f32 (float32x4_t __a)
22864 return __builtin_aarch64_frecpev4sf (__a);
22867 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22868 vrecpeq_f64 (float64x2_t __a)
22870 return __builtin_aarch64_frecpev2df (__a);
22873 /* vrecps */
22875 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22876 vrecpss_f32 (float32_t __a, float32_t __b)
22878 return __builtin_aarch64_frecpssf (__a, __b);
22881 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22882 vrecpsd_f64 (float64_t __a, float64_t __b)
22884 return __builtin_aarch64_frecpsdf (__a, __b);
22887 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22888 vrecps_f32 (float32x2_t __a, float32x2_t __b)
22890 return __builtin_aarch64_frecpsv2sf (__a, __b);
22893 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22894 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
22896 return __builtin_aarch64_frecpsv4sf (__a, __b);
22899 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22900 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
22902 return __builtin_aarch64_frecpsv2df (__a, __b);
22905 /* vrecpx */
22907 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
22908 vrecpxs_f32 (float32_t __a)
22910 return __builtin_aarch64_frecpxsf (__a);
22913 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
22914 vrecpxd_f64 (float64_t __a)
22916 return __builtin_aarch64_frecpxdf (__a);
22919 /* vrnd */
22921 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22922 vrnd_f32 (float32x2_t __a)
22924 return __builtin_aarch64_btruncv2sf (__a);
22927 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22928 vrndq_f32 (float32x4_t __a)
22930 return __builtin_aarch64_btruncv4sf (__a);
22933 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22934 vrndq_f64 (float64x2_t __a)
22936 return __builtin_aarch64_btruncv2df (__a);
22939 /* vrnda */
22941 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22942 vrnda_f32 (float32x2_t __a)
22944 return __builtin_aarch64_roundv2sf (__a);
22947 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22948 vrndaq_f32 (float32x4_t __a)
22950 return __builtin_aarch64_roundv4sf (__a);
22953 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22954 vrndaq_f64 (float64x2_t __a)
22956 return __builtin_aarch64_roundv2df (__a);
22959 /* vrndi */
22961 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22962 vrndi_f32 (float32x2_t __a)
22964 return __builtin_aarch64_nearbyintv2sf (__a);
22967 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22968 vrndiq_f32 (float32x4_t __a)
22970 return __builtin_aarch64_nearbyintv4sf (__a);
22973 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22974 vrndiq_f64 (float64x2_t __a)
22976 return __builtin_aarch64_nearbyintv2df (__a);
22979 /* vrndm */
22981 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22982 vrndm_f32 (float32x2_t __a)
22984 return __builtin_aarch64_floorv2sf (__a);
22987 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22988 vrndmq_f32 (float32x4_t __a)
22990 return __builtin_aarch64_floorv4sf (__a);
22993 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22994 vrndmq_f64 (float64x2_t __a)
22996 return __builtin_aarch64_floorv2df (__a);
22999 /* vrndn */
23001 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23002 vrndn_f32 (float32x2_t __a)
23004 return __builtin_aarch64_frintnv2sf (__a);
23006 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23007 vrndnq_f32 (float32x4_t __a)
23009 return __builtin_aarch64_frintnv4sf (__a);
23012 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23013 vrndnq_f64 (float64x2_t __a)
23015 return __builtin_aarch64_frintnv2df (__a);
23018 /* vrndp */
23020 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23021 vrndp_f32 (float32x2_t __a)
23023 return __builtin_aarch64_ceilv2sf (__a);
23026 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23027 vrndpq_f32 (float32x4_t __a)
23029 return __builtin_aarch64_ceilv4sf (__a);
23032 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23033 vrndpq_f64 (float64x2_t __a)
23035 return __builtin_aarch64_ceilv2df (__a);
23038 /* vrndx */
23040 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23041 vrndx_f32 (float32x2_t __a)
23043 return __builtin_aarch64_rintv2sf (__a);
23046 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23047 vrndxq_f32 (float32x4_t __a)
23049 return __builtin_aarch64_rintv4sf (__a);
23052 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23053 vrndxq_f64 (float64x2_t __a)
23055 return __builtin_aarch64_rintv2df (__a);
23058 /* vrshl */
23060 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23061 vrshl_s8 (int8x8_t __a, int8x8_t __b)
23063 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
23066 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23067 vrshl_s16 (int16x4_t __a, int16x4_t __b)
23069 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
23072 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23073 vrshl_s32 (int32x2_t __a, int32x2_t __b)
23075 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
23078 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23079 vrshl_s64 (int64x1_t __a, int64x1_t __b)
23081 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23084 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23085 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
23087 return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b);
23090 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23091 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
23093 return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b);
23096 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23097 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
23099 return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b);
23102 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23103 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
23105 return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b);
23108 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23109 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
23111 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
23114 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23115 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
23117 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
23120 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23121 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
23123 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
23126 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23127 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
23129 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
23132 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23133 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
23135 return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b);
23138 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23139 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
23141 return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b);
23144 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23145 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
23147 return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b);
23150 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23151 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
23153 return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b);
23156 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23157 vrshld_s64 (int64x1_t __a, int64x1_t __b)
23159 return (int64x1_t) __builtin_aarch64_srshldi (__a, __b);
23162 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23163 vrshld_u64 (uint64x1_t __a, uint64x1_t __b)
23165 return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b);
23168 /* vrshr */
23170 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23171 vrshr_n_s8 (int8x8_t __a, const int __b)
23173 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
23176 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23177 vrshr_n_s16 (int16x4_t __a, const int __b)
23179 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
23182 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23183 vrshr_n_s32 (int32x2_t __a, const int __b)
23185 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
23188 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23189 vrshr_n_s64 (int64x1_t __a, const int __b)
23191 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23194 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23195 vrshr_n_u8 (uint8x8_t __a, const int __b)
23197 return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b);
23200 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23201 vrshr_n_u16 (uint16x4_t __a, const int __b)
23203 return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b);
23206 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23207 vrshr_n_u32 (uint32x2_t __a, const int __b)
23209 return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b);
23212 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23213 vrshr_n_u64 (uint64x1_t __a, const int __b)
23215 return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b);
23218 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23219 vrshrq_n_s8 (int8x16_t __a, const int __b)
23221 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
23224 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23225 vrshrq_n_s16 (int16x8_t __a, const int __b)
23227 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
23230 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23231 vrshrq_n_s32 (int32x4_t __a, const int __b)
23233 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
23236 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23237 vrshrq_n_s64 (int64x2_t __a, const int __b)
23239 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
23242 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23243 vrshrq_n_u8 (uint8x16_t __a, const int __b)
23245 return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b);
23248 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23249 vrshrq_n_u16 (uint16x8_t __a, const int __b)
23251 return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b);
23254 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23255 vrshrq_n_u32 (uint32x4_t __a, const int __b)
23257 return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b);
23260 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23261 vrshrq_n_u64 (uint64x2_t __a, const int __b)
23263 return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b);
23266 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23267 vrshrd_n_s64 (int64x1_t __a, const int __b)
23269 return (int64x1_t) __builtin_aarch64_srshr_ndi (__a, __b);
23272 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23273 vrshrd_n_u64 (uint64x1_t __a, const int __b)
23275 return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b);
23278 /* vrsra */
23280 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23281 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23283 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
23286 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23287 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23289 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
23292 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23293 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23295 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
23298 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23299 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23301 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23304 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23305 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23307 return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a,
23308 (int8x8_t) __b, __c);
23311 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23312 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23314 return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a,
23315 (int16x4_t) __b, __c);
23318 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23319 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23321 return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a,
23322 (int32x2_t) __b, __c);
23325 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23326 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23328 return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a,
23329 (int64x1_t) __b, __c);
23332 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23333 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23335 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
23338 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23339 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23341 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
23344 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23345 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23347 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
23350 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23351 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23353 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
23356 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23357 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23359 return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a,
23360 (int8x16_t) __b, __c);
23363 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23364 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23366 return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a,
23367 (int16x8_t) __b, __c);
23370 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23371 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23373 return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a,
23374 (int32x4_t) __b, __c);
23377 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23378 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23380 return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a,
23381 (int64x2_t) __b, __c);
23384 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23385 vrsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23387 return (int64x1_t) __builtin_aarch64_srsra_ndi (__a, __b, __c);
23390 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23391 vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23393 return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c);
23396 /* vshl */
23398 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23399 vshl_n_s8 (int8x8_t __a, const int __b)
23401 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
23404 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23405 vshl_n_s16 (int16x4_t __a, const int __b)
23407 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
23410 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23411 vshl_n_s32 (int32x2_t __a, const int __b)
23413 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
23416 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23417 vshl_n_s64 (int64x1_t __a, const int __b)
23419 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23422 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23423 vshl_n_u8 (uint8x8_t __a, const int __b)
23425 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
23428 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23429 vshl_n_u16 (uint16x4_t __a, const int __b)
23431 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
23434 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23435 vshl_n_u32 (uint32x2_t __a, const int __b)
23437 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
23440 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23441 vshl_n_u64 (uint64x1_t __a, const int __b)
23443 return (uint64x1_t) __builtin_aarch64_ashldi ((int64x1_t) __a, __b);
23446 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23447 vshlq_n_s8 (int8x16_t __a, const int __b)
23449 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
23452 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23453 vshlq_n_s16 (int16x8_t __a, const int __b)
23455 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
23458 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23459 vshlq_n_s32 (int32x4_t __a, const int __b)
23461 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
23464 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23465 vshlq_n_s64 (int64x2_t __a, const int __b)
23467 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
23470 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23471 vshlq_n_u8 (uint8x16_t __a, const int __b)
23473 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
23476 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23477 vshlq_n_u16 (uint16x8_t __a, const int __b)
23479 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
23482 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23483 vshlq_n_u32 (uint32x4_t __a, const int __b)
23485 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
23488 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23489 vshlq_n_u64 (uint64x2_t __a, const int __b)
23491 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
23494 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23495 vshld_n_s64 (int64x1_t __a, const int __b)
23497 return (int64x1_t) __builtin_aarch64_ashldi (__a, __b);
23500 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23501 vshld_n_u64 (uint64x1_t __a, const int __b)
23503 return (uint64x1_t) __builtin_aarch64_ashldi (__a, __b);
23506 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23507 vshl_s8 (int8x8_t __a, int8x8_t __b)
23509 return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b);
23512 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23513 vshl_s16 (int16x4_t __a, int16x4_t __b)
23515 return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b);
23518 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23519 vshl_s32 (int32x2_t __a, int32x2_t __b)
23521 return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b);
23524 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23525 vshl_s64 (int64x1_t __a, int64x1_t __b)
23527 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23530 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23531 vshl_u8 (uint8x8_t __a, int8x8_t __b)
23533 return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b);
23536 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23537 vshl_u16 (uint16x4_t __a, int16x4_t __b)
23539 return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b);
23542 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23543 vshl_u32 (uint32x2_t __a, int32x2_t __b)
23545 return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b);
23548 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23549 vshl_u64 (uint64x1_t __a, int64x1_t __b)
23551 return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b);
23554 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23555 vshlq_s8 (int8x16_t __a, int8x16_t __b)
23557 return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b);
23560 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23561 vshlq_s16 (int16x8_t __a, int16x8_t __b)
23563 return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b);
23566 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23567 vshlq_s32 (int32x4_t __a, int32x4_t __b)
23569 return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b);
23572 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23573 vshlq_s64 (int64x2_t __a, int64x2_t __b)
23575 return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b);
23578 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23579 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
23581 return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b);
23584 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23585 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
23587 return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b);
23590 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23591 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
23593 return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b);
23596 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23597 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
23599 return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b);
23602 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23603 vshld_s64 (int64x1_t __a, int64x1_t __b)
23605 return (int64x1_t) __builtin_aarch64_sshldi (__a, __b);
23608 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23609 vshld_u64 (uint64x1_t __a, uint64x1_t __b)
23611 return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b);
23614 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23615 vshll_high_n_s8 (int8x16_t __a, const int __b)
23617 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
23620 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23621 vshll_high_n_s16 (int16x8_t __a, const int __b)
23623 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
23626 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23627 vshll_high_n_s32 (int32x4_t __a, const int __b)
23629 return __builtin_aarch64_sshll2_nv4si (__a, __b);
23632 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23633 vshll_high_n_u8 (uint8x16_t __a, const int __b)
23635 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
23638 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23639 vshll_high_n_u16 (uint16x8_t __a, const int __b)
23641 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
23644 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23645 vshll_high_n_u32 (uint32x4_t __a, const int __b)
23647 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
23650 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23651 vshll_n_s8 (int8x8_t __a, const int __b)
23653 return __builtin_aarch64_sshll_nv8qi (__a, __b);
23656 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23657 vshll_n_s16 (int16x4_t __a, const int __b)
23659 return __builtin_aarch64_sshll_nv4hi (__a, __b);
23662 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23663 vshll_n_s32 (int32x2_t __a, const int __b)
23665 return __builtin_aarch64_sshll_nv2si (__a, __b);
23668 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23669 vshll_n_u8 (uint8x8_t __a, const int __b)
23671 return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b);
23674 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23675 vshll_n_u16 (uint16x4_t __a, const int __b)
23677 return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b);
23680 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23681 vshll_n_u32 (uint32x2_t __a, const int __b)
23683 return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b);
23686 /* vshr */
23688 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23689 vshr_n_s8 (int8x8_t __a, const int __b)
23691 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
23694 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23695 vshr_n_s16 (int16x4_t __a, const int __b)
23697 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
23700 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23701 vshr_n_s32 (int32x2_t __a, const int __b)
23703 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
23706 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23707 vshr_n_s64 (int64x1_t __a, const int __b)
23709 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23712 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23713 vshr_n_u8 (uint8x8_t __a, const int __b)
23715 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
23718 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23719 vshr_n_u16 (uint16x4_t __a, const int __b)
23721 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
23724 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23725 vshr_n_u32 (uint32x2_t __a, const int __b)
23727 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
23730 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23731 vshr_n_u64 (uint64x1_t __a, const int __b)
23733 return (uint64x1_t) __builtin_aarch64_lshrdi ((int64x1_t) __a, __b);
23736 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23737 vshrq_n_s8 (int8x16_t __a, const int __b)
23739 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
23742 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23743 vshrq_n_s16 (int16x8_t __a, const int __b)
23745 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
23748 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23749 vshrq_n_s32 (int32x4_t __a, const int __b)
23751 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
23754 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23755 vshrq_n_s64 (int64x2_t __a, const int __b)
23757 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
23760 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23761 vshrq_n_u8 (uint8x16_t __a, const int __b)
23763 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
23766 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23767 vshrq_n_u16 (uint16x8_t __a, const int __b)
23769 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
23772 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23773 vshrq_n_u32 (uint32x4_t __a, const int __b)
23775 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
23778 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23779 vshrq_n_u64 (uint64x2_t __a, const int __b)
23781 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
23784 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23785 vshrd_n_s64 (int64x1_t __a, const int __b)
23787 return (int64x1_t) __builtin_aarch64_ashrdi (__a, __b);
23790 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23791 vshrd_n_u64 (uint64x1_t __a, const int __b)
23793 return (uint64x1_t) __builtin_aarch64_lshrdi (__a, __b);
23796 /* vsli */
23798 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23799 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
23801 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
23804 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23805 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
23807 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
23810 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23811 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
23813 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
23816 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23817 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23819 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23822 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23823 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
23825 return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a,
23826 (int8x8_t) __b, __c);
23829 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23830 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
23832 return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a,
23833 (int16x4_t) __b, __c);
23836 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23837 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
23839 return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a,
23840 (int32x2_t) __b, __c);
23843 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23844 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23846 return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a,
23847 (int64x1_t) __b, __c);
23850 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23851 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
23853 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
23856 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23857 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
23859 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
23862 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23863 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
23865 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
23868 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23869 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
23871 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
23874 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23875 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
23877 return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a,
23878 (int8x16_t) __b, __c);
23881 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23882 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
23884 return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a,
23885 (int16x8_t) __b, __c);
23888 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23889 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
23891 return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a,
23892 (int32x4_t) __b, __c);
23895 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23896 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
23898 return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a,
23899 (int64x2_t) __b, __c);
23902 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
23903 vslid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
23905 return (int64x1_t) __builtin_aarch64_ssli_ndi (__a, __b, __c);
23908 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23909 vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
23911 return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c);
23914 /* vsqadd */
23916 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23917 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
23919 return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a,
23920 (int8x8_t) __b);
23923 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23924 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
23926 return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a,
23927 (int16x4_t) __b);
23930 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23931 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
23933 return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a,
23934 (int32x2_t) __b);
23937 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23938 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
23940 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23943 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23944 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
23946 return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a,
23947 (int8x16_t) __b);
23950 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23951 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
23953 return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a,
23954 (int16x8_t) __b);
23957 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23958 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
23960 return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a,
23961 (int32x4_t) __b);
23964 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23965 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
23967 return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a,
23968 (int64x2_t) __b);
23971 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
23972 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
23974 return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b);
23977 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
23978 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
23980 return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b);
23983 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
23984 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
23986 return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b);
23989 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
23990 vsqaddd_u64 (uint64x1_t __a, int64x1_t __b)
23992 return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b);
23995 /* vsqrt */
23996 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23997 vsqrt_f32 (float32x2_t a)
23999 return __builtin_aarch64_sqrtv2sf (a);
24002 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24003 vsqrtq_f32 (float32x4_t a)
24005 return __builtin_aarch64_sqrtv4sf (a);
24008 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24009 vsqrtq_f64 (float64x2_t a)
24011 return __builtin_aarch64_sqrtv2df (a);
24014 /* vsra */
24016 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24017 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
24019 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
24022 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24023 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
24025 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
24028 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24029 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
24031 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
24034 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24035 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24037 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
24040 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24041 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
24043 return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a,
24044 (int8x8_t) __b, __c);
24047 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24048 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
24050 return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a,
24051 (int16x4_t) __b, __c);
24054 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24055 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
24057 return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a,
24058 (int32x2_t) __b, __c);
24061 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24062 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24064 return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a,
24065 (int64x1_t) __b, __c);
24068 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24069 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24071 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
24074 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24075 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24077 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
24080 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24081 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24083 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
24086 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24087 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24089 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
24092 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24093 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24095 return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a,
24096 (int8x16_t) __b, __c);
24099 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24100 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24102 return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a,
24103 (int16x8_t) __b, __c);
24106 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24107 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24109 return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a,
24110 (int32x4_t) __b, __c);
24113 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24114 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24116 return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a,
24117 (int64x2_t) __b, __c);
24120 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24121 vsrad_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24123 return (int64x1_t) __builtin_aarch64_ssra_ndi (__a, __b, __c);
24126 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24127 vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24129 return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c);
24132 /* vsri */
24134 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24135 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
24137 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
24140 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24141 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
24143 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
24146 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24147 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
24149 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
24152 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24153 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24155 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24158 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24159 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
24161 return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a,
24162 (int8x8_t) __b, __c);
24165 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24166 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
24168 return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a,
24169 (int16x4_t) __b, __c);
24172 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24173 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
24175 return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a,
24176 (int32x2_t) __b, __c);
24179 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24180 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24182 return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a,
24183 (int64x1_t) __b, __c);
24186 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24187 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
24189 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
24192 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24193 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
24195 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
24198 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24199 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
24201 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
24204 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24205 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
24207 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
24210 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24211 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
24213 return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a,
24214 (int8x16_t) __b, __c);
24217 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24218 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
24220 return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a,
24221 (int16x8_t) __b, __c);
24224 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24225 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
24227 return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a,
24228 (int32x4_t) __b, __c);
24231 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24232 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
24234 return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a,
24235 (int64x2_t) __b, __c);
24238 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24239 vsrid_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
24241 return (int64x1_t) __builtin_aarch64_ssri_ndi (__a, __b, __c);
24244 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24245 vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
24247 return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c);
24250 /* vst1 */
24252 __extension__ static __inline void __attribute__ ((__always_inline__))
24253 vst1_f32 (float32_t *a, float32x2_t b)
24255 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
24258 __extension__ static __inline void __attribute__ ((__always_inline__))
24259 vst1_f64 (float64_t *a, float64x1_t b)
24261 *a = b;
24264 __extension__ static __inline void __attribute__ ((__always_inline__))
24265 vst1_p8 (poly8_t *a, poly8x8_t b)
24267 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
24268 (int8x8_t) b);
24271 __extension__ static __inline void __attribute__ ((__always_inline__))
24272 vst1_p16 (poly16_t *a, poly16x4_t b)
24274 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
24275 (int16x4_t) b);
24278 __extension__ static __inline void __attribute__ ((__always_inline__))
24279 vst1_s8 (int8_t *a, int8x8_t b)
24281 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
24284 __extension__ static __inline void __attribute__ ((__always_inline__))
24285 vst1_s16 (int16_t *a, int16x4_t b)
24287 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
24290 __extension__ static __inline void __attribute__ ((__always_inline__))
24291 vst1_s32 (int32_t *a, int32x2_t b)
24293 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
24296 __extension__ static __inline void __attribute__ ((__always_inline__))
24297 vst1_s64 (int64_t *a, int64x1_t b)
24299 *a = b;
24302 __extension__ static __inline void __attribute__ ((__always_inline__))
24303 vst1_u8 (uint8_t *a, uint8x8_t b)
24305 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
24306 (int8x8_t) b);
24309 __extension__ static __inline void __attribute__ ((__always_inline__))
24310 vst1_u16 (uint16_t *a, uint16x4_t b)
24312 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
24313 (int16x4_t) b);
24316 __extension__ static __inline void __attribute__ ((__always_inline__))
24317 vst1_u32 (uint32_t *a, uint32x2_t b)
24319 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
24320 (int32x2_t) b);
24323 __extension__ static __inline void __attribute__ ((__always_inline__))
24324 vst1_u64 (uint64_t *a, uint64x1_t b)
24326 *a = b;
24329 __extension__ static __inline void __attribute__ ((__always_inline__))
24330 vst1q_f32 (float32_t *a, float32x4_t b)
24332 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
24335 __extension__ static __inline void __attribute__ ((__always_inline__))
24336 vst1q_f64 (float64_t *a, float64x2_t b)
24338 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
24341 /* vst1q */
24343 __extension__ static __inline void __attribute__ ((__always_inline__))
24344 vst1q_p8 (poly8_t *a, poly8x16_t b)
24346 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24347 (int8x16_t) b);
24350 __extension__ static __inline void __attribute__ ((__always_inline__))
24351 vst1q_p16 (poly16_t *a, poly16x8_t b)
24353 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24354 (int16x8_t) b);
24357 __extension__ static __inline void __attribute__ ((__always_inline__))
24358 vst1q_s8 (int8_t *a, int8x16_t b)
24360 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
24363 __extension__ static __inline void __attribute__ ((__always_inline__))
24364 vst1q_s16 (int16_t *a, int16x8_t b)
24366 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
24369 __extension__ static __inline void __attribute__ ((__always_inline__))
24370 vst1q_s32 (int32_t *a, int32x4_t b)
24372 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
24375 __extension__ static __inline void __attribute__ ((__always_inline__))
24376 vst1q_s64 (int64_t *a, int64x2_t b)
24378 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
24381 __extension__ static __inline void __attribute__ ((__always_inline__))
24382 vst1q_u8 (uint8_t *a, uint8x16_t b)
24384 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
24385 (int8x16_t) b);
24388 __extension__ static __inline void __attribute__ ((__always_inline__))
24389 vst1q_u16 (uint16_t *a, uint16x8_t b)
24391 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
24392 (int16x8_t) b);
24395 __extension__ static __inline void __attribute__ ((__always_inline__))
24396 vst1q_u32 (uint32_t *a, uint32x4_t b)
24398 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
24399 (int32x4_t) b);
24402 __extension__ static __inline void __attribute__ ((__always_inline__))
24403 vst1q_u64 (uint64_t *a, uint64x2_t b)
24405 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
24406 (int64x2_t) b);
24409 /* vstn */
24411 __extension__ static __inline void
24412 vst2_s64 (int64_t * __a, int64x1x2_t val)
24414 __builtin_aarch64_simd_oi __o;
24415 int64x2x2_t temp;
24416 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24417 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24418 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24419 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24420 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24423 __extension__ static __inline void
24424 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
24426 __builtin_aarch64_simd_oi __o;
24427 uint64x2x2_t temp;
24428 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24429 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24430 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
24431 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
24432 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
24435 __extension__ static __inline void
24436 vst2_f64 (float64_t * __a, float64x1x2_t val)
24438 __builtin_aarch64_simd_oi __o;
24439 float64x2x2_t temp;
24440 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24441 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24442 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
24443 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
24444 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
24447 __extension__ static __inline void
24448 vst2_s8 (int8_t * __a, int8x8x2_t val)
24450 __builtin_aarch64_simd_oi __o;
24451 int8x16x2_t temp;
24452 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24453 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24454 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24455 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24456 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24459 __extension__ static __inline void __attribute__ ((__always_inline__))
24460 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
24462 __builtin_aarch64_simd_oi __o;
24463 poly8x16x2_t temp;
24464 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24465 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24466 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24467 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24468 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24471 __extension__ static __inline void __attribute__ ((__always_inline__))
24472 vst2_s16 (int16_t * __a, int16x4x2_t val)
24474 __builtin_aarch64_simd_oi __o;
24475 int16x8x2_t temp;
24476 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24477 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24478 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24479 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24480 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24483 __extension__ static __inline void __attribute__ ((__always_inline__))
24484 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
24486 __builtin_aarch64_simd_oi __o;
24487 poly16x8x2_t temp;
24488 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24489 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24490 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24491 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24492 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24495 __extension__ static __inline void __attribute__ ((__always_inline__))
24496 vst2_s32 (int32_t * __a, int32x2x2_t val)
24498 __builtin_aarch64_simd_oi __o;
24499 int32x4x2_t temp;
24500 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24501 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24502 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24503 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24504 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24507 __extension__ static __inline void __attribute__ ((__always_inline__))
24508 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
24510 __builtin_aarch64_simd_oi __o;
24511 uint8x16x2_t temp;
24512 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24513 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24514 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
24515 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
24516 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24519 __extension__ static __inline void __attribute__ ((__always_inline__))
24520 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
24522 __builtin_aarch64_simd_oi __o;
24523 uint16x8x2_t temp;
24524 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
24525 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
24526 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
24527 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
24528 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24531 __extension__ static __inline void __attribute__ ((__always_inline__))
24532 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
24534 __builtin_aarch64_simd_oi __o;
24535 uint32x4x2_t temp;
24536 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
24537 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
24538 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
24539 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
24540 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
24543 __extension__ static __inline void __attribute__ ((__always_inline__))
24544 vst2_f32 (float32_t * __a, float32x2x2_t val)
24546 __builtin_aarch64_simd_oi __o;
24547 float32x4x2_t temp;
24548 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
24549 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
24550 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
24551 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
24552 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24555 __extension__ static __inline void __attribute__ ((__always_inline__))
24556 vst2q_s8 (int8_t * __a, int8x16x2_t val)
24558 __builtin_aarch64_simd_oi __o;
24559 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24560 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24561 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24564 __extension__ static __inline void __attribute__ ((__always_inline__))
24565 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
24567 __builtin_aarch64_simd_oi __o;
24568 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24569 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24570 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24573 __extension__ static __inline void __attribute__ ((__always_inline__))
24574 vst2q_s16 (int16_t * __a, int16x8x2_t val)
24576 __builtin_aarch64_simd_oi __o;
24577 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24578 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24579 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24582 __extension__ static __inline void __attribute__ ((__always_inline__))
24583 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
24585 __builtin_aarch64_simd_oi __o;
24586 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24587 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24588 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24591 __extension__ static __inline void __attribute__ ((__always_inline__))
24592 vst2q_s32 (int32_t * __a, int32x4x2_t val)
24594 __builtin_aarch64_simd_oi __o;
24595 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24596 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24597 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24600 __extension__ static __inline void __attribute__ ((__always_inline__))
24601 vst2q_s64 (int64_t * __a, int64x2x2_t val)
24603 __builtin_aarch64_simd_oi __o;
24604 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24605 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24606 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24609 __extension__ static __inline void __attribute__ ((__always_inline__))
24610 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
24612 __builtin_aarch64_simd_oi __o;
24613 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
24614 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
24615 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24618 __extension__ static __inline void __attribute__ ((__always_inline__))
24619 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
24621 __builtin_aarch64_simd_oi __o;
24622 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
24623 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
24624 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24627 __extension__ static __inline void __attribute__ ((__always_inline__))
24628 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
24630 __builtin_aarch64_simd_oi __o;
24631 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
24632 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
24633 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
24636 __extension__ static __inline void __attribute__ ((__always_inline__))
24637 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
24639 __builtin_aarch64_simd_oi __o;
24640 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
24641 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
24642 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
24645 __extension__ static __inline void __attribute__ ((__always_inline__))
24646 vst2q_f32 (float32_t * __a, float32x4x2_t val)
24648 __builtin_aarch64_simd_oi __o;
24649 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
24650 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
24651 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24654 __extension__ static __inline void __attribute__ ((__always_inline__))
24655 vst2q_f64 (float64_t * __a, float64x2x2_t val)
24657 __builtin_aarch64_simd_oi __o;
24658 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
24659 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
24660 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
24663 __extension__ static __inline void
24664 vst3_s64 (int64_t * __a, int64x1x3_t val)
24666 __builtin_aarch64_simd_ci __o;
24667 int64x2x3_t temp;
24668 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24669 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24670 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
24671 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24672 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24673 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24674 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24677 __extension__ static __inline void
24678 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
24680 __builtin_aarch64_simd_ci __o;
24681 uint64x2x3_t temp;
24682 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24683 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24684 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
24685 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
24686 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
24687 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
24688 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
24691 __extension__ static __inline void
24692 vst3_f64 (float64_t * __a, float64x1x3_t val)
24694 __builtin_aarch64_simd_ci __o;
24695 float64x2x3_t temp;
24696 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24697 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24698 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
24699 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
24700 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
24701 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
24702 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
24705 __extension__ static __inline void
24706 vst3_s8 (int8_t * __a, int8x8x3_t val)
24708 __builtin_aarch64_simd_ci __o;
24709 int8x16x3_t temp;
24710 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
24711 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
24712 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
24713 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24714 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24715 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24716 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24719 __extension__ static __inline void __attribute__ ((__always_inline__))
24720 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
24722 __builtin_aarch64_simd_ci __o;
24723 poly8x16x3_t temp;
24724 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
24725 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
24726 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
24727 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24728 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24729 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24730 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24733 __extension__ static __inline void __attribute__ ((__always_inline__))
24734 vst3_s16 (int16_t * __a, int16x4x3_t val)
24736 __builtin_aarch64_simd_ci __o;
24737 int16x8x3_t temp;
24738 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
24739 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
24740 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
24741 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24742 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24743 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24744 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24747 __extension__ static __inline void __attribute__ ((__always_inline__))
24748 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
24750 __builtin_aarch64_simd_ci __o;
24751 poly16x8x3_t temp;
24752 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
24753 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
24754 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
24755 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24756 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24757 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24758 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24761 __extension__ static __inline void __attribute__ ((__always_inline__))
24762 vst3_s32 (int32_t * __a, int32x2x3_t val)
24764 __builtin_aarch64_simd_ci __o;
24765 int32x4x3_t temp;
24766 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
24767 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
24768 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
24769 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24770 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24771 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24772 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24775 __extension__ static __inline void __attribute__ ((__always_inline__))
24776 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
24778 __builtin_aarch64_simd_ci __o;
24779 uint8x16x3_t temp;
24780 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
24781 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
24782 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
24783 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
24784 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
24785 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
24786 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
24789 __extension__ static __inline void __attribute__ ((__always_inline__))
24790 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
24792 __builtin_aarch64_simd_ci __o;
24793 uint16x8x3_t temp;
24794 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
24795 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
24796 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
24797 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
24798 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
24799 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
24800 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
24803 __extension__ static __inline void __attribute__ ((__always_inline__))
24804 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
24806 __builtin_aarch64_simd_ci __o;
24807 uint32x4x3_t temp;
24808 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
24809 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
24810 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
24811 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
24812 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
24813 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
24814 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
24817 __extension__ static __inline void __attribute__ ((__always_inline__))
24818 vst3_f32 (float32_t * __a, float32x2x3_t val)
24820 __builtin_aarch64_simd_ci __o;
24821 float32x4x3_t temp;
24822 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
24823 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
24824 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
24825 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
24826 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
24827 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
24828 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
24831 __extension__ static __inline void __attribute__ ((__always_inline__))
24832 vst3q_s8 (int8_t * __a, int8x16x3_t val)
24834 __builtin_aarch64_simd_ci __o;
24835 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24836 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24837 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24838 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24841 __extension__ static __inline void __attribute__ ((__always_inline__))
24842 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
24844 __builtin_aarch64_simd_ci __o;
24845 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24846 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24847 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24848 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24851 __extension__ static __inline void __attribute__ ((__always_inline__))
24852 vst3q_s16 (int16_t * __a, int16x8x3_t val)
24854 __builtin_aarch64_simd_ci __o;
24855 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24856 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24857 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24858 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24861 __extension__ static __inline void __attribute__ ((__always_inline__))
24862 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
24864 __builtin_aarch64_simd_ci __o;
24865 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24866 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24867 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24868 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24871 __extension__ static __inline void __attribute__ ((__always_inline__))
24872 vst3q_s32 (int32_t * __a, int32x4x3_t val)
24874 __builtin_aarch64_simd_ci __o;
24875 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24876 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24877 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24878 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24881 __extension__ static __inline void __attribute__ ((__always_inline__))
24882 vst3q_s64 (int64_t * __a, int64x2x3_t val)
24884 __builtin_aarch64_simd_ci __o;
24885 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24886 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24887 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24888 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24891 __extension__ static __inline void __attribute__ ((__always_inline__))
24892 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
24894 __builtin_aarch64_simd_ci __o;
24895 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
24896 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
24897 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
24898 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
24901 __extension__ static __inline void __attribute__ ((__always_inline__))
24902 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
24904 __builtin_aarch64_simd_ci __o;
24905 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
24906 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
24907 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
24908 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
24911 __extension__ static __inline void __attribute__ ((__always_inline__))
24912 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
24914 __builtin_aarch64_simd_ci __o;
24915 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
24916 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
24917 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
24918 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
24921 __extension__ static __inline void __attribute__ ((__always_inline__))
24922 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
24924 __builtin_aarch64_simd_ci __o;
24925 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
24926 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
24927 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
24928 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
24931 __extension__ static __inline void __attribute__ ((__always_inline__))
24932 vst3q_f32 (float32_t * __a, float32x4x3_t val)
24934 __builtin_aarch64_simd_ci __o;
24935 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
24936 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
24937 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
24938 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
24941 __extension__ static __inline void __attribute__ ((__always_inline__))
24942 vst3q_f64 (float64_t * __a, float64x2x3_t val)
24944 __builtin_aarch64_simd_ci __o;
24945 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
24946 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
24947 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
24948 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
24951 __extension__ static __inline void
24952 vst4_s64 (int64_t * __a, int64x1x4_t val)
24954 __builtin_aarch64_simd_xi __o;
24955 int64x2x4_t temp;
24956 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (INT64_C (0)));
24957 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (INT64_C (0)));
24958 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (INT64_C (0)));
24959 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (INT64_C (0)));
24960 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24961 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24962 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24963 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24964 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24967 __extension__ static __inline void
24968 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
24970 __builtin_aarch64_simd_xi __o;
24971 uint64x2x4_t temp;
24972 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (UINT64_C (0)));
24973 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (UINT64_C (0)));
24974 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (UINT64_C (0)));
24975 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (UINT64_C (0)));
24976 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
24977 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
24978 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
24979 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
24980 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
24983 __extension__ static __inline void
24984 vst4_f64 (float64_t * __a, float64x1x4_t val)
24986 __builtin_aarch64_simd_xi __o;
24987 float64x2x4_t temp;
24988 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (UINT64_C (0)));
24989 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (UINT64_C (0)));
24990 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (UINT64_C (0)));
24991 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (UINT64_C (0)));
24992 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
24993 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
24994 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
24995 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
24996 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
24999 __extension__ static __inline void
25000 vst4_s8 (int8_t * __a, int8x8x4_t val)
25002 __builtin_aarch64_simd_xi __o;
25003 int8x16x4_t temp;
25004 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (INT64_C (0)));
25005 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (INT64_C (0)));
25006 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (INT64_C (0)));
25007 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (INT64_C (0)));
25008 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
25009 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
25010 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
25011 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
25012 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
25015 __extension__ static __inline void __attribute__ ((__always_inline__))
25016 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
25018 __builtin_aarch64_simd_xi __o;
25019 poly8x16x4_t temp;
25020 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (UINT64_C (0)));
25021 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (UINT64_C (0)));
25022 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (UINT64_C (0)));
25023 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (UINT64_C (0)));
25024 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
25025 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
25026 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
25027 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
25028 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
25031 __extension__ static __inline void __attribute__ ((__always_inline__))
25032 vst4_s16 (int16_t * __a, int16x4x4_t val)
25034 __builtin_aarch64_simd_xi __o;
25035 int16x8x4_t temp;
25036 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (INT64_C (0)));
25037 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (INT64_C (0)));
25038 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (INT64_C (0)));
25039 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (INT64_C (0)));
25040 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
25041 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
25042 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
25043 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
25044 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
25047 __extension__ static __inline void __attribute__ ((__always_inline__))
25048 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
25050 __builtin_aarch64_simd_xi __o;
25051 poly16x8x4_t temp;
25052 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (UINT64_C (0)));
25053 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (UINT64_C (0)));
25054 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (UINT64_C (0)));
25055 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (UINT64_C (0)));
25056 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
25057 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
25058 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
25059 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
25060 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
25063 __extension__ static __inline void __attribute__ ((__always_inline__))
25064 vst4_s32 (int32_t * __a, int32x2x4_t val)
25066 __builtin_aarch64_simd_xi __o;
25067 int32x4x4_t temp;
25068 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (INT64_C (0)));
25069 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (INT64_C (0)));
25070 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (INT64_C (0)));
25071 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (INT64_C (0)));
25072 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
25073 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
25074 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
25075 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
25076 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
25079 __extension__ static __inline void __attribute__ ((__always_inline__))
25080 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
25082 __builtin_aarch64_simd_xi __o;
25083 uint8x16x4_t temp;
25084 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (UINT64_C (0)));
25085 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (UINT64_C (0)));
25086 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (UINT64_C (0)));
25087 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (UINT64_C (0)));
25088 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
25089 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
25090 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
25091 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
25092 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
25095 __extension__ static __inline void __attribute__ ((__always_inline__))
25096 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
25098 __builtin_aarch64_simd_xi __o;
25099 uint16x8x4_t temp;
25100 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (UINT64_C (0)));
25101 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (UINT64_C (0)));
25102 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (UINT64_C (0)));
25103 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (UINT64_C (0)));
25104 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
25105 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
25106 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
25107 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
25108 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
25111 __extension__ static __inline void __attribute__ ((__always_inline__))
25112 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
25114 __builtin_aarch64_simd_xi __o;
25115 uint32x4x4_t temp;
25116 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (UINT64_C (0)));
25117 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (UINT64_C (0)));
25118 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (UINT64_C (0)));
25119 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (UINT64_C (0)));
25120 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
25121 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
25122 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
25123 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
25124 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
25127 __extension__ static __inline void __attribute__ ((__always_inline__))
25128 vst4_f32 (float32_t * __a, float32x2x4_t val)
25130 __builtin_aarch64_simd_xi __o;
25131 float32x4x4_t temp;
25132 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (UINT64_C (0)));
25133 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (UINT64_C (0)));
25134 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (UINT64_C (0)));
25135 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (UINT64_C (0)));
25136 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
25137 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
25138 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
25139 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
25140 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
25143 __extension__ static __inline void __attribute__ ((__always_inline__))
25144 vst4q_s8 (int8_t * __a, int8x16x4_t val)
25146 __builtin_aarch64_simd_xi __o;
25147 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25148 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25149 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25150 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25151 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25154 __extension__ static __inline void __attribute__ ((__always_inline__))
25155 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
25157 __builtin_aarch64_simd_xi __o;
25158 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25159 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25160 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25161 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25162 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25165 __extension__ static __inline void __attribute__ ((__always_inline__))
25166 vst4q_s16 (int16_t * __a, int16x8x4_t val)
25168 __builtin_aarch64_simd_xi __o;
25169 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25170 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25171 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25172 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25173 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25176 __extension__ static __inline void __attribute__ ((__always_inline__))
25177 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
25179 __builtin_aarch64_simd_xi __o;
25180 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25181 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25182 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25183 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25184 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25187 __extension__ static __inline void __attribute__ ((__always_inline__))
25188 vst4q_s32 (int32_t * __a, int32x4x4_t val)
25190 __builtin_aarch64_simd_xi __o;
25191 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25192 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25193 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25194 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25195 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25198 __extension__ static __inline void __attribute__ ((__always_inline__))
25199 vst4q_s64 (int64_t * __a, int64x2x4_t val)
25201 __builtin_aarch64_simd_xi __o;
25202 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25203 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25204 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25205 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25206 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25209 __extension__ static __inline void __attribute__ ((__always_inline__))
25210 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
25212 __builtin_aarch64_simd_xi __o;
25213 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
25214 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
25215 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
25216 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
25217 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
25220 __extension__ static __inline void __attribute__ ((__always_inline__))
25221 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
25223 __builtin_aarch64_simd_xi __o;
25224 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
25225 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
25226 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
25227 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
25228 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
25231 __extension__ static __inline void __attribute__ ((__always_inline__))
25232 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
25234 __builtin_aarch64_simd_xi __o;
25235 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
25236 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
25237 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
25238 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
25239 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
25242 __extension__ static __inline void __attribute__ ((__always_inline__))
25243 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
25245 __builtin_aarch64_simd_xi __o;
25246 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
25247 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
25248 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
25249 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
25250 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
25253 __extension__ static __inline void __attribute__ ((__always_inline__))
25254 vst4q_f32 (float32_t * __a, float32x4x4_t val)
25256 __builtin_aarch64_simd_xi __o;
25257 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
25258 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
25259 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
25260 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
25261 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
25264 __extension__ static __inline void __attribute__ ((__always_inline__))
25265 vst4q_f64 (float64_t * __a, float64x2x4_t val)
25267 __builtin_aarch64_simd_xi __o;
25268 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
25269 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
25270 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
25271 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
25272 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
25275 /* vsub */
25277 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25278 vsubd_s64 (int64x1_t __a, int64x1_t __b)
25280 return __a - __b;
25283 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25284 vsubd_u64 (uint64x1_t __a, uint64x1_t __b)
25286 return __a - __b;
25289 /* vtrn */
25291 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
25292 vtrn_f32 (float32x2_t a, float32x2_t b)
25294 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
25297 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
25298 vtrn_p8 (poly8x8_t a, poly8x8_t b)
25300 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
25303 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
25304 vtrn_p16 (poly16x4_t a, poly16x4_t b)
25306 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
25309 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
25310 vtrn_s8 (int8x8_t a, int8x8_t b)
25312 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
25315 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
25316 vtrn_s16 (int16x4_t a, int16x4_t b)
25318 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
25321 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
25322 vtrn_s32 (int32x2_t a, int32x2_t b)
25324 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
25327 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
25328 vtrn_u8 (uint8x8_t a, uint8x8_t b)
25330 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
25333 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
25334 vtrn_u16 (uint16x4_t a, uint16x4_t b)
25336 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
25339 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
25340 vtrn_u32 (uint32x2_t a, uint32x2_t b)
25342 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
25345 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
25346 vtrnq_f32 (float32x4_t a, float32x4_t b)
25348 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
25351 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
25352 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
25354 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
25357 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
25358 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
25360 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
25363 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
25364 vtrnq_s8 (int8x16_t a, int8x16_t b)
25366 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
25369 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
25370 vtrnq_s16 (int16x8_t a, int16x8_t b)
25372 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
25375 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
25376 vtrnq_s32 (int32x4_t a, int32x4_t b)
25378 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
25381 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
25382 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
25384 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
25387 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
25388 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
25390 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
25393 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
25394 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
25396 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
25399 /* vtst */
25401 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25402 vtst_s8 (int8x8_t __a, int8x8_t __b)
25404 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
25407 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25408 vtst_s16 (int16x4_t __a, int16x4_t __b)
25410 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
25413 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25414 vtst_s32 (int32x2_t __a, int32x2_t __b)
25416 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
25419 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25420 vtst_s64 (int64x1_t __a, int64x1_t __b)
25422 return (__a & __b) ? -1ll : 0ll;
25425 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25426 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
25428 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
25429 (int8x8_t) __b);
25432 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25433 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
25435 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
25436 (int16x4_t) __b);
25439 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25440 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
25442 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
25443 (int32x2_t) __b);
25446 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25447 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
25449 return (__a & __b) ? -1ll : 0ll;
25452 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25453 vtstq_s8 (int8x16_t __a, int8x16_t __b)
25455 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
25458 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25459 vtstq_s16 (int16x8_t __a, int16x8_t __b)
25461 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
25464 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25465 vtstq_s32 (int32x4_t __a, int32x4_t __b)
25467 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
25470 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25471 vtstq_s64 (int64x2_t __a, int64x2_t __b)
25473 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
25476 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25477 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
25479 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
25480 (int8x16_t) __b);
25483 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25484 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
25486 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
25487 (int16x8_t) __b);
25490 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25491 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
25493 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
25494 (int32x4_t) __b);
25497 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25498 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
25500 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
25501 (int64x2_t) __b);
25504 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25505 vtstd_s64 (int64x1_t __a, int64x1_t __b)
25507 return (__a & __b) ? -1ll : 0ll;
25510 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
25511 vtstd_u64 (uint64x1_t __a, uint64x1_t __b)
25513 return (__a & __b) ? -1ll : 0ll;
25516 /* vuqadd */
25518 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25519 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
25521 return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b);
25524 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25525 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
25527 return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b);
25530 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25531 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
25533 return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b);
25536 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25537 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
25539 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25542 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25543 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
25545 return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b);
25548 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25549 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
25551 return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b);
25554 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25555 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
25557 return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b);
25560 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25561 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
25563 return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b);
25566 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
25567 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
25569 return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b);
25572 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
25573 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
25575 return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b);
25578 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
25579 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
25581 return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b);
25584 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
25585 vuqaddd_s64 (int64x1_t __a, uint64x1_t __b)
25587 return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b);
25590 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
25591 __extension__ static __inline rettype \
25592 __attribute__ ((__always_inline__)) \
25593 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
25595 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
25596 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
25599 #define __INTERLEAVE_LIST(op) \
25600 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
25601 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
25602 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
25603 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
25604 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
25605 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
25606 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
25607 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
25608 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
25609 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
25610 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
25611 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
25612 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
25613 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
25614 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
25615 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
25616 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
25617 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
25619 /* vuzp */
25621 __INTERLEAVE_LIST (uzp)
25623 /* vzip */
25625 __INTERLEAVE_LIST (zip)
25627 #undef __INTERLEAVE_LIST
25628 #undef __DEFINTERLEAVE
25630 /* End of optimal implementations in approved order. */
25632 #undef __aarch64_vget_lane_any
25633 #undef __aarch64_vget_lane_f32
25634 #undef __aarch64_vget_lane_f64
25635 #undef __aarch64_vget_lane_p8
25636 #undef __aarch64_vget_lane_p16
25637 #undef __aarch64_vget_lane_s8
25638 #undef __aarch64_vget_lane_s16
25639 #undef __aarch64_vget_lane_s32
25640 #undef __aarch64_vget_lane_s64
25641 #undef __aarch64_vget_lane_u8
25642 #undef __aarch64_vget_lane_u16
25643 #undef __aarch64_vget_lane_u32
25644 #undef __aarch64_vget_lane_u64
25646 #undef __aarch64_vgetq_lane_f32
25647 #undef __aarch64_vgetq_lane_f64
25648 #undef __aarch64_vgetq_lane_p8
25649 #undef __aarch64_vgetq_lane_p16
25650 #undef __aarch64_vgetq_lane_s8
25651 #undef __aarch64_vgetq_lane_s16
25652 #undef __aarch64_vgetq_lane_s32
25653 #undef __aarch64_vgetq_lane_s64
25654 #undef __aarch64_vgetq_lane_u8
25655 #undef __aarch64_vgetq_lane_u16
25656 #undef __aarch64_vgetq_lane_u32
25657 #undef __aarch64_vgetq_lane_u64
25659 #endif