[PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests.
[official-gcc.git] / gcc / config / aarch64 / arm_neon.h
blob86926b9bb97963e6a9b858ffe88612f9aff0e0a9
1 /* ARM NEON intrinsics include file.
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
4 Contributed by ARM Ltd.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 #ifndef _AARCH64_NEON_H_
28 #define _AARCH64_NEON_H_
30 #include <stdint.h>
32 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
33 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
35 typedef __builtin_aarch64_simd_qi int8x8_t
36 __attribute__ ((__vector_size__ (8)));
37 typedef __builtin_aarch64_simd_hi int16x4_t
38 __attribute__ ((__vector_size__ (8)));
39 typedef __builtin_aarch64_simd_si int32x2_t
40 __attribute__ ((__vector_size__ (8)));
41 typedef __builtin_aarch64_simd_di int64x1_t
42 __attribute__ ((__vector_size__ (8)));
43 typedef int32_t int32x1_t;
44 typedef int16_t int16x1_t;
45 typedef int8_t int8x1_t;
46 typedef __builtin_aarch64_simd_df float64x1_t
47 __attribute__ ((__vector_size__ (8)));
48 typedef __builtin_aarch64_simd_sf float32x2_t
49 __attribute__ ((__vector_size__ (8)));
50 typedef __builtin_aarch64_simd_poly8 poly8x8_t
51 __attribute__ ((__vector_size__ (8)));
52 typedef __builtin_aarch64_simd_poly16 poly16x4_t
53 __attribute__ ((__vector_size__ (8)));
54 typedef __builtin_aarch64_simd_uqi uint8x8_t
55 __attribute__ ((__vector_size__ (8)));
56 typedef __builtin_aarch64_simd_uhi uint16x4_t
57 __attribute__ ((__vector_size__ (8)));
58 typedef __builtin_aarch64_simd_usi uint32x2_t
59 __attribute__ ((__vector_size__ (8)));
60 typedef __builtin_aarch64_simd_udi uint64x1_t
61 __attribute__ ((__vector_size__ (8)));
62 typedef uint32_t uint32x1_t;
63 typedef uint16_t uint16x1_t;
64 typedef uint8_t uint8x1_t;
65 typedef __builtin_aarch64_simd_qi int8x16_t
66 __attribute__ ((__vector_size__ (16)));
67 typedef __builtin_aarch64_simd_hi int16x8_t
68 __attribute__ ((__vector_size__ (16)));
69 typedef __builtin_aarch64_simd_si int32x4_t
70 __attribute__ ((__vector_size__ (16)));
71 typedef __builtin_aarch64_simd_di int64x2_t
72 __attribute__ ((__vector_size__ (16)));
73 typedef __builtin_aarch64_simd_sf float32x4_t
74 __attribute__ ((__vector_size__ (16)));
75 typedef __builtin_aarch64_simd_df float64x2_t
76 __attribute__ ((__vector_size__ (16)));
77 typedef __builtin_aarch64_simd_poly8 poly8x16_t
78 __attribute__ ((__vector_size__ (16)));
79 typedef __builtin_aarch64_simd_poly16 poly16x8_t
80 __attribute__ ((__vector_size__ (16)));
81 typedef __builtin_aarch64_simd_poly64 poly64x2_t
82 __attribute__ ((__vector_size__ (16)));
83 typedef __builtin_aarch64_simd_uqi uint8x16_t
84 __attribute__ ((__vector_size__ (16)));
85 typedef __builtin_aarch64_simd_uhi uint16x8_t
86 __attribute__ ((__vector_size__ (16)));
87 typedef __builtin_aarch64_simd_usi uint32x4_t
88 __attribute__ ((__vector_size__ (16)));
89 typedef __builtin_aarch64_simd_udi uint64x2_t
90 __attribute__ ((__vector_size__ (16)));
92 typedef float float32_t;
93 typedef double float64_t;
94 typedef __builtin_aarch64_simd_poly8 poly8_t;
95 typedef __builtin_aarch64_simd_poly16 poly16_t;
96 typedef __builtin_aarch64_simd_poly64 poly64_t;
97 typedef __builtin_aarch64_simd_poly128 poly128_t;
99 typedef struct int8x8x2_t
101 int8x8_t val[2];
102 } int8x8x2_t;
104 typedef struct int8x16x2_t
106 int8x16_t val[2];
107 } int8x16x2_t;
109 typedef struct int16x4x2_t
111 int16x4_t val[2];
112 } int16x4x2_t;
114 typedef struct int16x8x2_t
116 int16x8_t val[2];
117 } int16x8x2_t;
119 typedef struct int32x2x2_t
121 int32x2_t val[2];
122 } int32x2x2_t;
124 typedef struct int32x4x2_t
126 int32x4_t val[2];
127 } int32x4x2_t;
129 typedef struct int64x1x2_t
131 int64x1_t val[2];
132 } int64x1x2_t;
134 typedef struct int64x2x2_t
136 int64x2_t val[2];
137 } int64x2x2_t;
139 typedef struct uint8x8x2_t
141 uint8x8_t val[2];
142 } uint8x8x2_t;
144 typedef struct uint8x16x2_t
146 uint8x16_t val[2];
147 } uint8x16x2_t;
149 typedef struct uint16x4x2_t
151 uint16x4_t val[2];
152 } uint16x4x2_t;
154 typedef struct uint16x8x2_t
156 uint16x8_t val[2];
157 } uint16x8x2_t;
159 typedef struct uint32x2x2_t
161 uint32x2_t val[2];
162 } uint32x2x2_t;
164 typedef struct uint32x4x2_t
166 uint32x4_t val[2];
167 } uint32x4x2_t;
169 typedef struct uint64x1x2_t
171 uint64x1_t val[2];
172 } uint64x1x2_t;
174 typedef struct uint64x2x2_t
176 uint64x2_t val[2];
177 } uint64x2x2_t;
179 typedef struct float32x2x2_t
181 float32x2_t val[2];
182 } float32x2x2_t;
184 typedef struct float32x4x2_t
186 float32x4_t val[2];
187 } float32x4x2_t;
189 typedef struct float64x2x2_t
191 float64x2_t val[2];
192 } float64x2x2_t;
194 typedef struct float64x1x2_t
196 float64x1_t val[2];
197 } float64x1x2_t;
199 typedef struct poly8x8x2_t
201 poly8x8_t val[2];
202 } poly8x8x2_t;
204 typedef struct poly8x16x2_t
206 poly8x16_t val[2];
207 } poly8x16x2_t;
209 typedef struct poly16x4x2_t
211 poly16x4_t val[2];
212 } poly16x4x2_t;
214 typedef struct poly16x8x2_t
216 poly16x8_t val[2];
217 } poly16x8x2_t;
219 typedef struct int8x8x3_t
221 int8x8_t val[3];
222 } int8x8x3_t;
224 typedef struct int8x16x3_t
226 int8x16_t val[3];
227 } int8x16x3_t;
229 typedef struct int16x4x3_t
231 int16x4_t val[3];
232 } int16x4x3_t;
234 typedef struct int16x8x3_t
236 int16x8_t val[3];
237 } int16x8x3_t;
239 typedef struct int32x2x3_t
241 int32x2_t val[3];
242 } int32x2x3_t;
244 typedef struct int32x4x3_t
246 int32x4_t val[3];
247 } int32x4x3_t;
249 typedef struct int64x1x3_t
251 int64x1_t val[3];
252 } int64x1x3_t;
254 typedef struct int64x2x3_t
256 int64x2_t val[3];
257 } int64x2x3_t;
259 typedef struct uint8x8x3_t
261 uint8x8_t val[3];
262 } uint8x8x3_t;
264 typedef struct uint8x16x3_t
266 uint8x16_t val[3];
267 } uint8x16x3_t;
269 typedef struct uint16x4x3_t
271 uint16x4_t val[3];
272 } uint16x4x3_t;
274 typedef struct uint16x8x3_t
276 uint16x8_t val[3];
277 } uint16x8x3_t;
279 typedef struct uint32x2x3_t
281 uint32x2_t val[3];
282 } uint32x2x3_t;
284 typedef struct uint32x4x3_t
286 uint32x4_t val[3];
287 } uint32x4x3_t;
289 typedef struct uint64x1x3_t
291 uint64x1_t val[3];
292 } uint64x1x3_t;
294 typedef struct uint64x2x3_t
296 uint64x2_t val[3];
297 } uint64x2x3_t;
299 typedef struct float32x2x3_t
301 float32x2_t val[3];
302 } float32x2x3_t;
304 typedef struct float32x4x3_t
306 float32x4_t val[3];
307 } float32x4x3_t;
309 typedef struct float64x2x3_t
311 float64x2_t val[3];
312 } float64x2x3_t;
314 typedef struct float64x1x3_t
316 float64x1_t val[3];
317 } float64x1x3_t;
319 typedef struct poly8x8x3_t
321 poly8x8_t val[3];
322 } poly8x8x3_t;
324 typedef struct poly8x16x3_t
326 poly8x16_t val[3];
327 } poly8x16x3_t;
329 typedef struct poly16x4x3_t
331 poly16x4_t val[3];
332 } poly16x4x3_t;
334 typedef struct poly16x8x3_t
336 poly16x8_t val[3];
337 } poly16x8x3_t;
339 typedef struct int8x8x4_t
341 int8x8_t val[4];
342 } int8x8x4_t;
344 typedef struct int8x16x4_t
346 int8x16_t val[4];
347 } int8x16x4_t;
349 typedef struct int16x4x4_t
351 int16x4_t val[4];
352 } int16x4x4_t;
354 typedef struct int16x8x4_t
356 int16x8_t val[4];
357 } int16x8x4_t;
359 typedef struct int32x2x4_t
361 int32x2_t val[4];
362 } int32x2x4_t;
364 typedef struct int32x4x4_t
366 int32x4_t val[4];
367 } int32x4x4_t;
369 typedef struct int64x1x4_t
371 int64x1_t val[4];
372 } int64x1x4_t;
374 typedef struct int64x2x4_t
376 int64x2_t val[4];
377 } int64x2x4_t;
379 typedef struct uint8x8x4_t
381 uint8x8_t val[4];
382 } uint8x8x4_t;
384 typedef struct uint8x16x4_t
386 uint8x16_t val[4];
387 } uint8x16x4_t;
389 typedef struct uint16x4x4_t
391 uint16x4_t val[4];
392 } uint16x4x4_t;
394 typedef struct uint16x8x4_t
396 uint16x8_t val[4];
397 } uint16x8x4_t;
399 typedef struct uint32x2x4_t
401 uint32x2_t val[4];
402 } uint32x2x4_t;
404 typedef struct uint32x4x4_t
406 uint32x4_t val[4];
407 } uint32x4x4_t;
409 typedef struct uint64x1x4_t
411 uint64x1_t val[4];
412 } uint64x1x4_t;
414 typedef struct uint64x2x4_t
416 uint64x2_t val[4];
417 } uint64x2x4_t;
419 typedef struct float32x2x4_t
421 float32x2_t val[4];
422 } float32x2x4_t;
424 typedef struct float32x4x4_t
426 float32x4_t val[4];
427 } float32x4x4_t;
429 typedef struct float64x2x4_t
431 float64x2_t val[4];
432 } float64x2x4_t;
434 typedef struct float64x1x4_t
436 float64x1_t val[4];
437 } float64x1x4_t;
439 typedef struct poly8x8x4_t
441 poly8x8_t val[4];
442 } poly8x8x4_t;
444 typedef struct poly8x16x4_t
446 poly8x16_t val[4];
447 } poly8x16x4_t;
449 typedef struct poly16x4x4_t
451 poly16x4_t val[4];
452 } poly16x4x4_t;
454 typedef struct poly16x8x4_t
456 poly16x8_t val[4];
457 } poly16x8x4_t;
459 /* vget_lane internal macros. */
461 #define __aarch64_vget_lane_any(__size, __cast_ret, __cast_a, __a, __b) \
462 (__cast_ret \
463 __builtin_aarch64_be_checked_get_lane##__size (__cast_a __a, __b))
465 #define __aarch64_vget_lane_f32(__a, __b) \
466 __aarch64_vget_lane_any (v2sf, , , __a, __b)
467 #define __aarch64_vget_lane_f64(__a, __b) __extension__ \
468 ({ \
469 __builtin_aarch64_im_lane_boundsi (__b, 1); \
470 __a[0]; \
473 #define __aarch64_vget_lane_p8(__a, __b) \
474 __aarch64_vget_lane_any (v8qi, (poly8_t), (int8x8_t), __a, __b)
475 #define __aarch64_vget_lane_p16(__a, __b) \
476 __aarch64_vget_lane_any (v4hi, (poly16_t), (int16x4_t), __a, __b)
478 #define __aarch64_vget_lane_s8(__a, __b) \
479 __aarch64_vget_lane_any (v8qi, , ,__a, __b)
480 #define __aarch64_vget_lane_s16(__a, __b) \
481 __aarch64_vget_lane_any (v4hi, , ,__a, __b)
482 #define __aarch64_vget_lane_s32(__a, __b) \
483 __aarch64_vget_lane_any (v2si, , ,__a, __b)
484 #define __aarch64_vget_lane_s64(__a, __b) __extension__ \
485 ({ \
486 __builtin_aarch64_im_lane_boundsi (__b, 1); \
487 __a[0]; \
490 #define __aarch64_vget_lane_u8(__a, __b) \
491 __aarch64_vget_lane_any (v8qi, (uint8_t), (int8x8_t), __a, __b)
492 #define __aarch64_vget_lane_u16(__a, __b) \
493 __aarch64_vget_lane_any (v4hi, (uint16_t), (int16x4_t), __a, __b)
494 #define __aarch64_vget_lane_u32(__a, __b) \
495 __aarch64_vget_lane_any (v2si, (uint32_t), (int32x2_t), __a, __b)
496 #define __aarch64_vget_lane_u64(__a, __b) __extension__ \
497 ({ \
498 __builtin_aarch64_im_lane_boundsi (__b, 1); \
499 __a[0]; \
502 #define __aarch64_vgetq_lane_f32(__a, __b) \
503 __aarch64_vget_lane_any (v4sf, , , __a, __b)
504 #define __aarch64_vgetq_lane_f64(__a, __b) \
505 __aarch64_vget_lane_any (v2df, , , __a, __b)
507 #define __aarch64_vgetq_lane_p8(__a, __b) \
508 __aarch64_vget_lane_any (v16qi, (poly8_t), (int8x16_t), __a, __b)
509 #define __aarch64_vgetq_lane_p16(__a, __b) \
510 __aarch64_vget_lane_any (v8hi, (poly16_t), (int16x8_t), __a, __b)
512 #define __aarch64_vgetq_lane_s8(__a, __b) \
513 __aarch64_vget_lane_any (v16qi, , ,__a, __b)
514 #define __aarch64_vgetq_lane_s16(__a, __b) \
515 __aarch64_vget_lane_any (v8hi, , ,__a, __b)
516 #define __aarch64_vgetq_lane_s32(__a, __b) \
517 __aarch64_vget_lane_any (v4si, , ,__a, __b)
518 #define __aarch64_vgetq_lane_s64(__a, __b) \
519 __aarch64_vget_lane_any (v2di, , ,__a, __b)
521 #define __aarch64_vgetq_lane_u8(__a, __b) \
522 __aarch64_vget_lane_any (v16qi, (uint8_t), (int8x16_t), __a, __b)
523 #define __aarch64_vgetq_lane_u16(__a, __b) \
524 __aarch64_vget_lane_any (v8hi, (uint16_t), (int16x8_t), __a, __b)
525 #define __aarch64_vgetq_lane_u32(__a, __b) \
526 __aarch64_vget_lane_any (v4si, (uint32_t), (int32x4_t), __a, __b)
527 #define __aarch64_vgetq_lane_u64(__a, __b) \
528 __aarch64_vget_lane_any (v2di, (uint64_t), (int64x2_t), __a, __b)
530 /* __aarch64_vdup_lane internal macros. */
531 #define __aarch64_vdup_lane_any(__size, __q1, __q2, __a, __b) \
532 vdup##__q1##_n_##__size (__aarch64_vget##__q2##_lane_##__size (__a, __b))
534 #define __aarch64_vdup_lane_f32(__a, __b) \
535 __aarch64_vdup_lane_any (f32, , , __a, __b)
536 #define __aarch64_vdup_lane_f64(__a, __b) \
537 __aarch64_vdup_lane_any (f64, , , __a, __b)
538 #define __aarch64_vdup_lane_p8(__a, __b) \
539 __aarch64_vdup_lane_any (p8, , , __a, __b)
540 #define __aarch64_vdup_lane_p16(__a, __b) \
541 __aarch64_vdup_lane_any (p16, , , __a, __b)
542 #define __aarch64_vdup_lane_s8(__a, __b) \
543 __aarch64_vdup_lane_any (s8, , , __a, __b)
544 #define __aarch64_vdup_lane_s16(__a, __b) \
545 __aarch64_vdup_lane_any (s16, , , __a, __b)
546 #define __aarch64_vdup_lane_s32(__a, __b) \
547 __aarch64_vdup_lane_any (s32, , , __a, __b)
548 #define __aarch64_vdup_lane_s64(__a, __b) \
549 __aarch64_vdup_lane_any (s64, , , __a, __b)
550 #define __aarch64_vdup_lane_u8(__a, __b) \
551 __aarch64_vdup_lane_any (u8, , , __a, __b)
552 #define __aarch64_vdup_lane_u16(__a, __b) \
553 __aarch64_vdup_lane_any (u16, , , __a, __b)
554 #define __aarch64_vdup_lane_u32(__a, __b) \
555 __aarch64_vdup_lane_any (u32, , , __a, __b)
556 #define __aarch64_vdup_lane_u64(__a, __b) \
557 __aarch64_vdup_lane_any (u64, , , __a, __b)
559 /* __aarch64_vdup_laneq internal macros. */
560 #define __aarch64_vdup_laneq_f32(__a, __b) \
561 __aarch64_vdup_lane_any (f32, , q, __a, __b)
562 #define __aarch64_vdup_laneq_f64(__a, __b) \
563 __aarch64_vdup_lane_any (f64, , q, __a, __b)
564 #define __aarch64_vdup_laneq_p8(__a, __b) \
565 __aarch64_vdup_lane_any (p8, , q, __a, __b)
566 #define __aarch64_vdup_laneq_p16(__a, __b) \
567 __aarch64_vdup_lane_any (p16, , q, __a, __b)
568 #define __aarch64_vdup_laneq_s8(__a, __b) \
569 __aarch64_vdup_lane_any (s8, , q, __a, __b)
570 #define __aarch64_vdup_laneq_s16(__a, __b) \
571 __aarch64_vdup_lane_any (s16, , q, __a, __b)
572 #define __aarch64_vdup_laneq_s32(__a, __b) \
573 __aarch64_vdup_lane_any (s32, , q, __a, __b)
574 #define __aarch64_vdup_laneq_s64(__a, __b) \
575 __aarch64_vdup_lane_any (s64, , q, __a, __b)
576 #define __aarch64_vdup_laneq_u8(__a, __b) \
577 __aarch64_vdup_lane_any (u8, , q, __a, __b)
578 #define __aarch64_vdup_laneq_u16(__a, __b) \
579 __aarch64_vdup_lane_any (u16, , q, __a, __b)
580 #define __aarch64_vdup_laneq_u32(__a, __b) \
581 __aarch64_vdup_lane_any (u32, , q, __a, __b)
582 #define __aarch64_vdup_laneq_u64(__a, __b) \
583 __aarch64_vdup_lane_any (u64, , q, __a, __b)
585 /* __aarch64_vdupq_lane internal macros. */
586 #define __aarch64_vdupq_lane_f32(__a, __b) \
587 __aarch64_vdup_lane_any (f32, q, , __a, __b)
588 #define __aarch64_vdupq_lane_f64(__a, __b) \
589 __aarch64_vdup_lane_any (f64, q, , __a, __b)
590 #define __aarch64_vdupq_lane_p8(__a, __b) \
591 __aarch64_vdup_lane_any (p8, q, , __a, __b)
592 #define __aarch64_vdupq_lane_p16(__a, __b) \
593 __aarch64_vdup_lane_any (p16, q, , __a, __b)
594 #define __aarch64_vdupq_lane_s8(__a, __b) \
595 __aarch64_vdup_lane_any (s8, q, , __a, __b)
596 #define __aarch64_vdupq_lane_s16(__a, __b) \
597 __aarch64_vdup_lane_any (s16, q, , __a, __b)
598 #define __aarch64_vdupq_lane_s32(__a, __b) \
599 __aarch64_vdup_lane_any (s32, q, , __a, __b)
600 #define __aarch64_vdupq_lane_s64(__a, __b) \
601 __aarch64_vdup_lane_any (s64, q, , __a, __b)
602 #define __aarch64_vdupq_lane_u8(__a, __b) \
603 __aarch64_vdup_lane_any (u8, q, , __a, __b)
604 #define __aarch64_vdupq_lane_u16(__a, __b) \
605 __aarch64_vdup_lane_any (u16, q, , __a, __b)
606 #define __aarch64_vdupq_lane_u32(__a, __b) \
607 __aarch64_vdup_lane_any (u32, q, , __a, __b)
608 #define __aarch64_vdupq_lane_u64(__a, __b) \
609 __aarch64_vdup_lane_any (u64, q, , __a, __b)
611 /* __aarch64_vdupq_laneq internal macros. */
612 #define __aarch64_vdupq_laneq_f32(__a, __b) \
613 __aarch64_vdup_lane_any (f32, q, q, __a, __b)
614 #define __aarch64_vdupq_laneq_f64(__a, __b) \
615 __aarch64_vdup_lane_any (f64, q, q, __a, __b)
616 #define __aarch64_vdupq_laneq_p8(__a, __b) \
617 __aarch64_vdup_lane_any (p8, q, q, __a, __b)
618 #define __aarch64_vdupq_laneq_p16(__a, __b) \
619 __aarch64_vdup_lane_any (p16, q, q, __a, __b)
620 #define __aarch64_vdupq_laneq_s8(__a, __b) \
621 __aarch64_vdup_lane_any (s8, q, q, __a, __b)
622 #define __aarch64_vdupq_laneq_s16(__a, __b) \
623 __aarch64_vdup_lane_any (s16, q, q, __a, __b)
624 #define __aarch64_vdupq_laneq_s32(__a, __b) \
625 __aarch64_vdup_lane_any (s32, q, q, __a, __b)
626 #define __aarch64_vdupq_laneq_s64(__a, __b) \
627 __aarch64_vdup_lane_any (s64, q, q, __a, __b)
628 #define __aarch64_vdupq_laneq_u8(__a, __b) \
629 __aarch64_vdup_lane_any (u8, q, q, __a, __b)
630 #define __aarch64_vdupq_laneq_u16(__a, __b) \
631 __aarch64_vdup_lane_any (u16, q, q, __a, __b)
632 #define __aarch64_vdupq_laneq_u32(__a, __b) \
633 __aarch64_vdup_lane_any (u32, q, q, __a, __b)
634 #define __aarch64_vdupq_laneq_u64(__a, __b) \
635 __aarch64_vdup_lane_any (u64, q, q, __a, __b)
637 /* vadd */
638 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
639 vadd_s8 (int8x8_t __a, int8x8_t __b)
641 return __a + __b;
644 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
645 vadd_s16 (int16x4_t __a, int16x4_t __b)
647 return __a + __b;
650 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
651 vadd_s32 (int32x2_t __a, int32x2_t __b)
653 return __a + __b;
656 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
657 vadd_f32 (float32x2_t __a, float32x2_t __b)
659 return __a + __b;
662 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
663 vadd_f64 (float64x1_t __a, float64x1_t __b)
665 return __a + __b;
668 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
669 vadd_u8 (uint8x8_t __a, uint8x8_t __b)
671 return __a + __b;
674 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
675 vadd_u16 (uint16x4_t __a, uint16x4_t __b)
677 return __a + __b;
680 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
681 vadd_u32 (uint32x2_t __a, uint32x2_t __b)
683 return __a + __b;
686 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
687 vadd_s64 (int64x1_t __a, int64x1_t __b)
689 return __a + __b;
692 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
693 vadd_u64 (uint64x1_t __a, uint64x1_t __b)
695 return __a + __b;
698 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
699 vaddq_s8 (int8x16_t __a, int8x16_t __b)
701 return __a + __b;
704 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
705 vaddq_s16 (int16x8_t __a, int16x8_t __b)
707 return __a + __b;
710 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
711 vaddq_s32 (int32x4_t __a, int32x4_t __b)
713 return __a + __b;
716 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
717 vaddq_s64 (int64x2_t __a, int64x2_t __b)
719 return __a + __b;
722 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
723 vaddq_f32 (float32x4_t __a, float32x4_t __b)
725 return __a + __b;
728 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
729 vaddq_f64 (float64x2_t __a, float64x2_t __b)
731 return __a + __b;
734 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
735 vaddq_u8 (uint8x16_t __a, uint8x16_t __b)
737 return __a + __b;
740 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
741 vaddq_u16 (uint16x8_t __a, uint16x8_t __b)
743 return __a + __b;
746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
747 vaddq_u32 (uint32x4_t __a, uint32x4_t __b)
749 return __a + __b;
752 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
753 vaddq_u64 (uint64x2_t __a, uint64x2_t __b)
755 return __a + __b;
758 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
759 vaddl_s8 (int8x8_t __a, int8x8_t __b)
761 return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b);
764 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
765 vaddl_s16 (int16x4_t __a, int16x4_t __b)
767 return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b);
770 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
771 vaddl_s32 (int32x2_t __a, int32x2_t __b)
773 return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b);
776 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
777 vaddl_u8 (uint8x8_t __a, uint8x8_t __b)
779 return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a,
780 (int8x8_t) __b);
783 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
784 vaddl_u16 (uint16x4_t __a, uint16x4_t __b)
786 return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a,
787 (int16x4_t) __b);
790 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
791 vaddl_u32 (uint32x2_t __a, uint32x2_t __b)
793 return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a,
794 (int32x2_t) __b);
797 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
798 vaddl_high_s8 (int8x16_t __a, int8x16_t __b)
800 return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b);
803 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
804 vaddl_high_s16 (int16x8_t __a, int16x8_t __b)
806 return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b);
809 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
810 vaddl_high_s32 (int32x4_t __a, int32x4_t __b)
812 return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b);
815 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
816 vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b)
818 return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a,
819 (int8x16_t) __b);
822 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
823 vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b)
825 return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a,
826 (int16x8_t) __b);
829 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
830 vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b)
832 return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a,
833 (int32x4_t) __b);
836 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
837 vaddw_s8 (int16x8_t __a, int8x8_t __b)
839 return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b);
842 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
843 vaddw_s16 (int32x4_t __a, int16x4_t __b)
845 return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b);
848 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
849 vaddw_s32 (int64x2_t __a, int32x2_t __b)
851 return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b);
854 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
855 vaddw_u8 (uint16x8_t __a, uint8x8_t __b)
857 return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a,
858 (int8x8_t) __b);
861 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
862 vaddw_u16 (uint32x4_t __a, uint16x4_t __b)
864 return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a,
865 (int16x4_t) __b);
868 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
869 vaddw_u32 (uint64x2_t __a, uint32x2_t __b)
871 return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a,
872 (int32x2_t) __b);
875 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
876 vaddw_high_s8 (int16x8_t __a, int8x16_t __b)
878 return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b);
881 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
882 vaddw_high_s16 (int32x4_t __a, int16x8_t __b)
884 return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b);
887 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
888 vaddw_high_s32 (int64x2_t __a, int32x4_t __b)
890 return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b);
893 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
894 vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b)
896 return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a,
897 (int8x16_t) __b);
900 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
901 vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b)
903 return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a,
904 (int16x8_t) __b);
907 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
908 vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b)
910 return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a,
911 (int32x4_t) __b);
914 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
915 vhadd_s8 (int8x8_t __a, int8x8_t __b)
917 return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b);
920 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
921 vhadd_s16 (int16x4_t __a, int16x4_t __b)
923 return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b);
926 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
927 vhadd_s32 (int32x2_t __a, int32x2_t __b)
929 return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b);
932 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
933 vhadd_u8 (uint8x8_t __a, uint8x8_t __b)
935 return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a,
936 (int8x8_t) __b);
939 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
940 vhadd_u16 (uint16x4_t __a, uint16x4_t __b)
942 return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a,
943 (int16x4_t) __b);
946 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
947 vhadd_u32 (uint32x2_t __a, uint32x2_t __b)
949 return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a,
950 (int32x2_t) __b);
953 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
954 vhaddq_s8 (int8x16_t __a, int8x16_t __b)
956 return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b);
959 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
960 vhaddq_s16 (int16x8_t __a, int16x8_t __b)
962 return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b);
965 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
966 vhaddq_s32 (int32x4_t __a, int32x4_t __b)
968 return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b);
971 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
972 vhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
974 return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a,
975 (int8x16_t) __b);
978 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
979 vhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
981 return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a,
982 (int16x8_t) __b);
985 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
986 vhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
988 return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a,
989 (int32x4_t) __b);
992 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
993 vrhadd_s8 (int8x8_t __a, int8x8_t __b)
995 return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b);
998 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
999 vrhadd_s16 (int16x4_t __a, int16x4_t __b)
1001 return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b);
1004 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1005 vrhadd_s32 (int32x2_t __a, int32x2_t __b)
1007 return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b);
1010 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1011 vrhadd_u8 (uint8x8_t __a, uint8x8_t __b)
1013 return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a,
1014 (int8x8_t) __b);
1017 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1018 vrhadd_u16 (uint16x4_t __a, uint16x4_t __b)
1020 return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a,
1021 (int16x4_t) __b);
1024 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1025 vrhadd_u32 (uint32x2_t __a, uint32x2_t __b)
1027 return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a,
1028 (int32x2_t) __b);
1031 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1032 vrhaddq_s8 (int8x16_t __a, int8x16_t __b)
1034 return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b);
1037 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1038 vrhaddq_s16 (int16x8_t __a, int16x8_t __b)
1040 return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b);
1043 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1044 vrhaddq_s32 (int32x4_t __a, int32x4_t __b)
1046 return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b);
1049 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1050 vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b)
1052 return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a,
1053 (int8x16_t) __b);
1056 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1057 vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b)
1059 return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a,
1060 (int16x8_t) __b);
1063 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1064 vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b)
1066 return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a,
1067 (int32x4_t) __b);
1070 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1071 vaddhn_s16 (int16x8_t __a, int16x8_t __b)
1073 return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b);
1076 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1077 vaddhn_s32 (int32x4_t __a, int32x4_t __b)
1079 return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b);
1082 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1083 vaddhn_s64 (int64x2_t __a, int64x2_t __b)
1085 return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b);
1088 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1089 vaddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1091 return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a,
1092 (int16x8_t) __b);
1095 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1096 vaddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1098 return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a,
1099 (int32x4_t) __b);
1102 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1103 vaddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1105 return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a,
1106 (int64x2_t) __b);
1109 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1110 vraddhn_s16 (int16x8_t __a, int16x8_t __b)
1112 return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b);
1115 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1116 vraddhn_s32 (int32x4_t __a, int32x4_t __b)
1118 return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b);
1121 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1122 vraddhn_s64 (int64x2_t __a, int64x2_t __b)
1124 return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b);
1127 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1128 vraddhn_u16 (uint16x8_t __a, uint16x8_t __b)
1130 return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a,
1131 (int16x8_t) __b);
1134 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1135 vraddhn_u32 (uint32x4_t __a, uint32x4_t __b)
1137 return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a,
1138 (int32x4_t) __b);
1141 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1142 vraddhn_u64 (uint64x2_t __a, uint64x2_t __b)
1144 return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a,
1145 (int64x2_t) __b);
1148 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1149 vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1151 return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c);
1154 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1155 vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1157 return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c);
1160 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1161 vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1163 return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c);
1166 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1167 vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1169 return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a,
1170 (int16x8_t) __b,
1171 (int16x8_t) __c);
1174 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1175 vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1177 return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a,
1178 (int32x4_t) __b,
1179 (int32x4_t) __c);
1182 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1183 vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1185 return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a,
1186 (int64x2_t) __b,
1187 (int64x2_t) __c);
1190 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1191 vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c)
1193 return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c);
1196 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1197 vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c)
1199 return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c);
1202 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1203 vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c)
1205 return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c);
1208 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1209 vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c)
1211 return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a,
1212 (int16x8_t) __b,
1213 (int16x8_t) __c);
1216 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1217 vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c)
1219 return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a,
1220 (int32x4_t) __b,
1221 (int32x4_t) __c);
1224 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1225 vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c)
1227 return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a,
1228 (int64x2_t) __b,
1229 (int64x2_t) __c);
1232 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1233 vdiv_f32 (float32x2_t __a, float32x2_t __b)
1235 return __a / __b;
1238 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1239 vdiv_f64 (float64x1_t __a, float64x1_t __b)
1241 return __a / __b;
1244 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1245 vdivq_f32 (float32x4_t __a, float32x4_t __b)
1247 return __a / __b;
1250 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1251 vdivq_f64 (float64x2_t __a, float64x2_t __b)
1253 return __a / __b;
1256 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1257 vmul_s8 (int8x8_t __a, int8x8_t __b)
1259 return __a * __b;
1262 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1263 vmul_s16 (int16x4_t __a, int16x4_t __b)
1265 return __a * __b;
1268 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1269 vmul_s32 (int32x2_t __a, int32x2_t __b)
1271 return __a * __b;
1274 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1275 vmul_f32 (float32x2_t __a, float32x2_t __b)
1277 return __a * __b;
1280 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1281 vmul_f64 (float64x1_t __a, float64x1_t __b)
1283 return __a * __b;
1286 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1287 vmul_u8 (uint8x8_t __a, uint8x8_t __b)
1289 return __a * __b;
1292 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1293 vmul_u16 (uint16x4_t __a, uint16x4_t __b)
1295 return __a * __b;
1298 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1299 vmul_u32 (uint32x2_t __a, uint32x2_t __b)
1301 return __a * __b;
1304 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
1305 vmul_p8 (poly8x8_t __a, poly8x8_t __b)
1307 return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a,
1308 (int8x8_t) __b);
1311 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1312 vmulq_s8 (int8x16_t __a, int8x16_t __b)
1314 return __a * __b;
1317 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1318 vmulq_s16 (int16x8_t __a, int16x8_t __b)
1320 return __a * __b;
1323 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1324 vmulq_s32 (int32x4_t __a, int32x4_t __b)
1326 return __a * __b;
1329 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1330 vmulq_f32 (float32x4_t __a, float32x4_t __b)
1332 return __a * __b;
1335 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1336 vmulq_f64 (float64x2_t __a, float64x2_t __b)
1338 return __a * __b;
1341 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1342 vmulq_u8 (uint8x16_t __a, uint8x16_t __b)
1344 return __a * __b;
1347 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1348 vmulq_u16 (uint16x8_t __a, uint16x8_t __b)
1350 return __a * __b;
1353 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1354 vmulq_u32 (uint32x4_t __a, uint32x4_t __b)
1356 return __a * __b;
1359 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
1360 vmulq_p8 (poly8x16_t __a, poly8x16_t __b)
1362 return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a,
1363 (int8x16_t) __b);
1366 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1367 vand_s8 (int8x8_t __a, int8x8_t __b)
1369 return __a & __b;
1372 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1373 vand_s16 (int16x4_t __a, int16x4_t __b)
1375 return __a & __b;
1378 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1379 vand_s32 (int32x2_t __a, int32x2_t __b)
1381 return __a & __b;
1384 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1385 vand_u8 (uint8x8_t __a, uint8x8_t __b)
1387 return __a & __b;
1390 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1391 vand_u16 (uint16x4_t __a, uint16x4_t __b)
1393 return __a & __b;
1396 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1397 vand_u32 (uint32x2_t __a, uint32x2_t __b)
1399 return __a & __b;
1402 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1403 vand_s64 (int64x1_t __a, int64x1_t __b)
1405 return __a & __b;
1408 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1409 vand_u64 (uint64x1_t __a, uint64x1_t __b)
1411 return __a & __b;
1414 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1415 vandq_s8 (int8x16_t __a, int8x16_t __b)
1417 return __a & __b;
1420 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1421 vandq_s16 (int16x8_t __a, int16x8_t __b)
1423 return __a & __b;
1426 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1427 vandq_s32 (int32x4_t __a, int32x4_t __b)
1429 return __a & __b;
1432 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1433 vandq_s64 (int64x2_t __a, int64x2_t __b)
1435 return __a & __b;
1438 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1439 vandq_u8 (uint8x16_t __a, uint8x16_t __b)
1441 return __a & __b;
1444 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1445 vandq_u16 (uint16x8_t __a, uint16x8_t __b)
1447 return __a & __b;
1450 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1451 vandq_u32 (uint32x4_t __a, uint32x4_t __b)
1453 return __a & __b;
1456 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1457 vandq_u64 (uint64x2_t __a, uint64x2_t __b)
1459 return __a & __b;
1462 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1463 vorr_s8 (int8x8_t __a, int8x8_t __b)
1465 return __a | __b;
1468 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1469 vorr_s16 (int16x4_t __a, int16x4_t __b)
1471 return __a | __b;
1474 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1475 vorr_s32 (int32x2_t __a, int32x2_t __b)
1477 return __a | __b;
1480 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1481 vorr_u8 (uint8x8_t __a, uint8x8_t __b)
1483 return __a | __b;
1486 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1487 vorr_u16 (uint16x4_t __a, uint16x4_t __b)
1489 return __a | __b;
1492 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1493 vorr_u32 (uint32x2_t __a, uint32x2_t __b)
1495 return __a | __b;
1498 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1499 vorr_s64 (int64x1_t __a, int64x1_t __b)
1501 return __a | __b;
1504 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1505 vorr_u64 (uint64x1_t __a, uint64x1_t __b)
1507 return __a | __b;
1510 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1511 vorrq_s8 (int8x16_t __a, int8x16_t __b)
1513 return __a | __b;
1516 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1517 vorrq_s16 (int16x8_t __a, int16x8_t __b)
1519 return __a | __b;
1522 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1523 vorrq_s32 (int32x4_t __a, int32x4_t __b)
1525 return __a | __b;
1528 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1529 vorrq_s64 (int64x2_t __a, int64x2_t __b)
1531 return __a | __b;
1534 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1535 vorrq_u8 (uint8x16_t __a, uint8x16_t __b)
1537 return __a | __b;
1540 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1541 vorrq_u16 (uint16x8_t __a, uint16x8_t __b)
1543 return __a | __b;
1546 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1547 vorrq_u32 (uint32x4_t __a, uint32x4_t __b)
1549 return __a | __b;
1552 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1553 vorrq_u64 (uint64x2_t __a, uint64x2_t __b)
1555 return __a | __b;
1558 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1559 veor_s8 (int8x8_t __a, int8x8_t __b)
1561 return __a ^ __b;
1564 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1565 veor_s16 (int16x4_t __a, int16x4_t __b)
1567 return __a ^ __b;
1570 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1571 veor_s32 (int32x2_t __a, int32x2_t __b)
1573 return __a ^ __b;
1576 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1577 veor_u8 (uint8x8_t __a, uint8x8_t __b)
1579 return __a ^ __b;
1582 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1583 veor_u16 (uint16x4_t __a, uint16x4_t __b)
1585 return __a ^ __b;
1588 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1589 veor_u32 (uint32x2_t __a, uint32x2_t __b)
1591 return __a ^ __b;
1594 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1595 veor_s64 (int64x1_t __a, int64x1_t __b)
1597 return __a ^ __b;
1600 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1601 veor_u64 (uint64x1_t __a, uint64x1_t __b)
1603 return __a ^ __b;
1606 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1607 veorq_s8 (int8x16_t __a, int8x16_t __b)
1609 return __a ^ __b;
1612 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1613 veorq_s16 (int16x8_t __a, int16x8_t __b)
1615 return __a ^ __b;
1618 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1619 veorq_s32 (int32x4_t __a, int32x4_t __b)
1621 return __a ^ __b;
1624 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1625 veorq_s64 (int64x2_t __a, int64x2_t __b)
1627 return __a ^ __b;
1630 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1631 veorq_u8 (uint8x16_t __a, uint8x16_t __b)
1633 return __a ^ __b;
1636 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1637 veorq_u16 (uint16x8_t __a, uint16x8_t __b)
1639 return __a ^ __b;
1642 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1643 veorq_u32 (uint32x4_t __a, uint32x4_t __b)
1645 return __a ^ __b;
1648 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1649 veorq_u64 (uint64x2_t __a, uint64x2_t __b)
1651 return __a ^ __b;
1654 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1655 vbic_s8 (int8x8_t __a, int8x8_t __b)
1657 return __a & ~__b;
1660 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1661 vbic_s16 (int16x4_t __a, int16x4_t __b)
1663 return __a & ~__b;
1666 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1667 vbic_s32 (int32x2_t __a, int32x2_t __b)
1669 return __a & ~__b;
1672 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1673 vbic_u8 (uint8x8_t __a, uint8x8_t __b)
1675 return __a & ~__b;
1678 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1679 vbic_u16 (uint16x4_t __a, uint16x4_t __b)
1681 return __a & ~__b;
1684 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1685 vbic_u32 (uint32x2_t __a, uint32x2_t __b)
1687 return __a & ~__b;
1690 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1691 vbic_s64 (int64x1_t __a, int64x1_t __b)
1693 return __a & ~__b;
1696 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1697 vbic_u64 (uint64x1_t __a, uint64x1_t __b)
1699 return __a & ~__b;
1702 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1703 vbicq_s8 (int8x16_t __a, int8x16_t __b)
1705 return __a & ~__b;
1708 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1709 vbicq_s16 (int16x8_t __a, int16x8_t __b)
1711 return __a & ~__b;
1714 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1715 vbicq_s32 (int32x4_t __a, int32x4_t __b)
1717 return __a & ~__b;
1720 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1721 vbicq_s64 (int64x2_t __a, int64x2_t __b)
1723 return __a & ~__b;
1726 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1727 vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
1729 return __a & ~__b;
1732 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1733 vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
1735 return __a & ~__b;
1738 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1739 vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
1741 return __a & ~__b;
1744 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1745 vbicq_u64 (uint64x2_t __a, uint64x2_t __b)
1747 return __a & ~__b;
1750 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1751 vorn_s8 (int8x8_t __a, int8x8_t __b)
1753 return __a | ~__b;
1756 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1757 vorn_s16 (int16x4_t __a, int16x4_t __b)
1759 return __a | ~__b;
1762 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1763 vorn_s32 (int32x2_t __a, int32x2_t __b)
1765 return __a | ~__b;
1768 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1769 vorn_u8 (uint8x8_t __a, uint8x8_t __b)
1771 return __a | ~__b;
1774 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1775 vorn_u16 (uint16x4_t __a, uint16x4_t __b)
1777 return __a | ~__b;
1780 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1781 vorn_u32 (uint32x2_t __a, uint32x2_t __b)
1783 return __a | ~__b;
1786 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1787 vorn_s64 (int64x1_t __a, int64x1_t __b)
1789 return __a | ~__b;
1792 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1793 vorn_u64 (uint64x1_t __a, uint64x1_t __b)
1795 return __a | ~__b;
1798 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1799 vornq_s8 (int8x16_t __a, int8x16_t __b)
1801 return __a | ~__b;
1804 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1805 vornq_s16 (int16x8_t __a, int16x8_t __b)
1807 return __a | ~__b;
1810 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1811 vornq_s32 (int32x4_t __a, int32x4_t __b)
1813 return __a | ~__b;
1816 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1817 vornq_s64 (int64x2_t __a, int64x2_t __b)
1819 return __a | ~__b;
1822 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1823 vornq_u8 (uint8x16_t __a, uint8x16_t __b)
1825 return __a | ~__b;
1828 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1829 vornq_u16 (uint16x8_t __a, uint16x8_t __b)
1831 return __a | ~__b;
1834 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1835 vornq_u32 (uint32x4_t __a, uint32x4_t __b)
1837 return __a | ~__b;
1840 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1841 vornq_u64 (uint64x2_t __a, uint64x2_t __b)
1843 return __a | ~__b;
1846 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
1847 vsub_s8 (int8x8_t __a, int8x8_t __b)
1849 return __a - __b;
1852 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
1853 vsub_s16 (int16x4_t __a, int16x4_t __b)
1855 return __a - __b;
1858 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
1859 vsub_s32 (int32x2_t __a, int32x2_t __b)
1861 return __a - __b;
1864 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
1865 vsub_f32 (float32x2_t __a, float32x2_t __b)
1867 return __a - __b;
1870 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
1871 vsub_f64 (float64x1_t __a, float64x1_t __b)
1873 return __a - __b;
1876 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
1877 vsub_u8 (uint8x8_t __a, uint8x8_t __b)
1879 return __a - __b;
1882 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
1883 vsub_u16 (uint16x4_t __a, uint16x4_t __b)
1885 return __a - __b;
1888 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
1889 vsub_u32 (uint32x2_t __a, uint32x2_t __b)
1891 return __a - __b;
1894 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
1895 vsub_s64 (int64x1_t __a, int64x1_t __b)
1897 return __a - __b;
1900 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
1901 vsub_u64 (uint64x1_t __a, uint64x1_t __b)
1903 return __a - __b;
1906 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
1907 vsubq_s8 (int8x16_t __a, int8x16_t __b)
1909 return __a - __b;
1912 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1913 vsubq_s16 (int16x8_t __a, int16x8_t __b)
1915 return __a - __b;
1918 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1919 vsubq_s32 (int32x4_t __a, int32x4_t __b)
1921 return __a - __b;
1924 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1925 vsubq_s64 (int64x2_t __a, int64x2_t __b)
1927 return __a - __b;
1930 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
1931 vsubq_f32 (float32x4_t __a, float32x4_t __b)
1933 return __a - __b;
1936 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
1937 vsubq_f64 (float64x2_t __a, float64x2_t __b)
1939 return __a - __b;
1942 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
1943 vsubq_u8 (uint8x16_t __a, uint8x16_t __b)
1945 return __a - __b;
1948 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1949 vsubq_u16 (uint16x8_t __a, uint16x8_t __b)
1951 return __a - __b;
1954 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1955 vsubq_u32 (uint32x4_t __a, uint32x4_t __b)
1957 return __a - __b;
1960 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1961 vsubq_u64 (uint64x2_t __a, uint64x2_t __b)
1963 return __a - __b;
1966 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
1967 vsubl_s8 (int8x8_t __a, int8x8_t __b)
1969 return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b);
1972 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
1973 vsubl_s16 (int16x4_t __a, int16x4_t __b)
1975 return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b);
1978 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
1979 vsubl_s32 (int32x2_t __a, int32x2_t __b)
1981 return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b);
1984 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
1985 vsubl_u8 (uint8x8_t __a, uint8x8_t __b)
1987 return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a,
1988 (int8x8_t) __b);
1991 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
1992 vsubl_u16 (uint16x4_t __a, uint16x4_t __b)
1994 return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a,
1995 (int16x4_t) __b);
1998 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
1999 vsubl_u32 (uint32x2_t __a, uint32x2_t __b)
2001 return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a,
2002 (int32x2_t) __b);
2005 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2006 vsubl_high_s8 (int8x16_t __a, int8x16_t __b)
2008 return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b);
2011 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2012 vsubl_high_s16 (int16x8_t __a, int16x8_t __b)
2014 return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b);
2017 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2018 vsubl_high_s32 (int32x4_t __a, int32x4_t __b)
2020 return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b);
2023 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2024 vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b)
2026 return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a,
2027 (int8x16_t) __b);
2030 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2031 vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b)
2033 return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a,
2034 (int16x8_t) __b);
2037 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2038 vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b)
2040 return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a,
2041 (int32x4_t) __b);
2044 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2045 vsubw_s8 (int16x8_t __a, int8x8_t __b)
2047 return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b);
2050 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2051 vsubw_s16 (int32x4_t __a, int16x4_t __b)
2053 return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b);
2056 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2057 vsubw_s32 (int64x2_t __a, int32x2_t __b)
2059 return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b);
2062 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2063 vsubw_u8 (uint16x8_t __a, uint8x8_t __b)
2065 return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a,
2066 (int8x8_t) __b);
2069 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2070 vsubw_u16 (uint32x4_t __a, uint16x4_t __b)
2072 return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a,
2073 (int16x4_t) __b);
2076 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2077 vsubw_u32 (uint64x2_t __a, uint32x2_t __b)
2079 return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a,
2080 (int32x2_t) __b);
2083 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2084 vsubw_high_s8 (int16x8_t __a, int8x16_t __b)
2086 return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b);
2089 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2090 vsubw_high_s16 (int32x4_t __a, int16x8_t __b)
2092 return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b);
2095 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2096 vsubw_high_s32 (int64x2_t __a, int32x4_t __b)
2098 return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b);
2101 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2102 vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b)
2104 return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a,
2105 (int8x16_t) __b);
2108 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2109 vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b)
2111 return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a,
2112 (int16x8_t) __b);
2115 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2116 vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b)
2118 return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a,
2119 (int32x4_t) __b);
2122 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2123 vqadd_s8 (int8x8_t __a, int8x8_t __b)
2125 return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
2128 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2129 vqadd_s16 (int16x4_t __a, int16x4_t __b)
2131 return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
2134 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2135 vqadd_s32 (int32x2_t __a, int32x2_t __b)
2137 return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
2140 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2141 vqadd_s64 (int64x1_t __a, int64x1_t __b)
2143 return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
2146 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2147 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
2149 return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
2152 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2153 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
2155 return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
2158 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2159 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
2161 return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
2164 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2165 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
2167 return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
2170 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2171 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
2173 return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
2176 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2177 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
2179 return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
2182 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2183 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
2185 return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
2188 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2189 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
2191 return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
2194 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2195 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
2197 return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
2200 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2201 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
2203 return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
2206 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2207 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
2209 return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
2212 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2213 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
2215 return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
2218 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2219 vqsub_s8 (int8x8_t __a, int8x8_t __b)
2221 return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
2224 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2225 vqsub_s16 (int16x4_t __a, int16x4_t __b)
2227 return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
2230 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2231 vqsub_s32 (int32x2_t __a, int32x2_t __b)
2233 return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
2236 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2237 vqsub_s64 (int64x1_t __a, int64x1_t __b)
2239 return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
2242 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2243 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
2245 return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
2248 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2249 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
2251 return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
2254 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2255 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
2257 return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
2260 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2261 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
2263 return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
2266 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2267 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
2269 return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
2272 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2273 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
2275 return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
2278 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2279 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
2281 return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
2284 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
2285 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
2287 return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
2290 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
2291 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
2293 return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
2296 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
2297 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
2299 return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
2302 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
2303 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
2305 return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
2308 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
2309 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
2311 return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
2314 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2315 vqneg_s8 (int8x8_t __a)
2317 return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a);
2320 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2321 vqneg_s16 (int16x4_t __a)
2323 return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a);
2326 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2327 vqneg_s32 (int32x2_t __a)
2329 return (int32x2_t) __builtin_aarch64_sqnegv2si (__a);
2332 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2333 vqneg_s64 (int64x1_t __a)
2335 return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])};
2338 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2339 vqnegq_s8 (int8x16_t __a)
2341 return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a);
2344 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2345 vqnegq_s16 (int16x8_t __a)
2347 return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a);
2350 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2351 vqnegq_s32 (int32x4_t __a)
2353 return (int32x4_t) __builtin_aarch64_sqnegv4si (__a);
2356 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2357 vqabs_s8 (int8x8_t __a)
2359 return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a);
2362 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2363 vqabs_s16 (int16x4_t __a)
2365 return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a);
2368 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2369 vqabs_s32 (int32x2_t __a)
2371 return (int32x2_t) __builtin_aarch64_sqabsv2si (__a);
2374 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2375 vqabs_s64 (int64x1_t __a)
2377 return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])};
2380 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
2381 vqabsq_s8 (int8x16_t __a)
2383 return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a);
2386 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2387 vqabsq_s16 (int16x8_t __a)
2389 return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a);
2392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2393 vqabsq_s32 (int32x4_t __a)
2395 return (int32x4_t) __builtin_aarch64_sqabsv4si (__a);
2398 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2399 vqdmulh_s16 (int16x4_t __a, int16x4_t __b)
2401 return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b);
2404 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2405 vqdmulh_s32 (int32x2_t __a, int32x2_t __b)
2407 return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b);
2410 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2411 vqdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2413 return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b);
2416 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2417 vqdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2419 return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b);
2422 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2423 vqrdmulh_s16 (int16x4_t __a, int16x4_t __b)
2425 return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b);
2428 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2429 vqrdmulh_s32 (int32x2_t __a, int32x2_t __b)
2431 return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b);
2434 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
2435 vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b)
2437 return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b);
2440 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
2441 vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b)
2443 return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b);
2446 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
2447 vcreate_s8 (uint64_t __a)
2449 return (int8x8_t) __a;
2452 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
2453 vcreate_s16 (uint64_t __a)
2455 return (int16x4_t) __a;
2458 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
2459 vcreate_s32 (uint64_t __a)
2461 return (int32x2_t) __a;
2464 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
2465 vcreate_s64 (uint64_t __a)
2467 return (int64x1_t) {__a};
2470 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2471 vcreate_f32 (uint64_t __a)
2473 return (float32x2_t) __a;
2476 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
2477 vcreate_u8 (uint64_t __a)
2479 return (uint8x8_t) __a;
2482 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
2483 vcreate_u16 (uint64_t __a)
2485 return (uint16x4_t) __a;
2488 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
2489 vcreate_u32 (uint64_t __a)
2491 return (uint32x2_t) __a;
2494 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
2495 vcreate_u64 (uint64_t __a)
2497 return (uint64x1_t) {__a};
2500 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
2501 vcreate_f64 (uint64_t __a)
2503 return __builtin_aarch64_createv1df (__a);
2506 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2507 vcreate_p8 (uint64_t __a)
2509 return (poly8x8_t) __a;
2512 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2513 vcreate_p16 (uint64_t __a)
2515 return (poly16x4_t) __a;
2518 /* vget_lane */
2520 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2521 vget_lane_f32 (float32x2_t __a, const int __b)
2523 return __aarch64_vget_lane_f32 (__a, __b);
2526 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2527 vget_lane_f64 (float64x1_t __a, const int __b)
2529 return __aarch64_vget_lane_f64 (__a, __b);
2532 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2533 vget_lane_p8 (poly8x8_t __a, const int __b)
2535 return __aarch64_vget_lane_p8 (__a, __b);
2538 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2539 vget_lane_p16 (poly16x4_t __a, const int __b)
2541 return __aarch64_vget_lane_p16 (__a, __b);
2544 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2545 vget_lane_s8 (int8x8_t __a, const int __b)
2547 return __aarch64_vget_lane_s8 (__a, __b);
2550 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2551 vget_lane_s16 (int16x4_t __a, const int __b)
2553 return __aarch64_vget_lane_s16 (__a, __b);
2556 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2557 vget_lane_s32 (int32x2_t __a, const int __b)
2559 return __aarch64_vget_lane_s32 (__a, __b);
2562 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2563 vget_lane_s64 (int64x1_t __a, const int __b)
2565 return __aarch64_vget_lane_s64 (__a, __b);
2568 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2569 vget_lane_u8 (uint8x8_t __a, const int __b)
2571 return __aarch64_vget_lane_u8 (__a, __b);
2574 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2575 vget_lane_u16 (uint16x4_t __a, const int __b)
2577 return __aarch64_vget_lane_u16 (__a, __b);
2580 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2581 vget_lane_u32 (uint32x2_t __a, const int __b)
2583 return __aarch64_vget_lane_u32 (__a, __b);
2586 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2587 vget_lane_u64 (uint64x1_t __a, const int __b)
2589 return __aarch64_vget_lane_u64 (__a, __b);
2592 /* vgetq_lane */
2594 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
2595 vgetq_lane_f32 (float32x4_t __a, const int __b)
2597 return __aarch64_vgetq_lane_f32 (__a, __b);
2600 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
2601 vgetq_lane_f64 (float64x2_t __a, const int __b)
2603 return __aarch64_vgetq_lane_f64 (__a, __b);
2606 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
2607 vgetq_lane_p8 (poly8x16_t __a, const int __b)
2609 return __aarch64_vgetq_lane_p8 (__a, __b);
2612 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
2613 vgetq_lane_p16 (poly16x8_t __a, const int __b)
2615 return __aarch64_vgetq_lane_p16 (__a, __b);
2618 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
2619 vgetq_lane_s8 (int8x16_t __a, const int __b)
2621 return __aarch64_vgetq_lane_s8 (__a, __b);
2624 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
2625 vgetq_lane_s16 (int16x8_t __a, const int __b)
2627 return __aarch64_vgetq_lane_s16 (__a, __b);
2630 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
2631 vgetq_lane_s32 (int32x4_t __a, const int __b)
2633 return __aarch64_vgetq_lane_s32 (__a, __b);
2636 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
2637 vgetq_lane_s64 (int64x2_t __a, const int __b)
2639 return __aarch64_vgetq_lane_s64 (__a, __b);
2642 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
2643 vgetq_lane_u8 (uint8x16_t __a, const int __b)
2645 return __aarch64_vgetq_lane_u8 (__a, __b);
2648 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
2649 vgetq_lane_u16 (uint16x8_t __a, const int __b)
2651 return __aarch64_vgetq_lane_u16 (__a, __b);
2654 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
2655 vgetq_lane_u32 (uint32x4_t __a, const int __b)
2657 return __aarch64_vgetq_lane_u32 (__a, __b);
2660 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
2661 vgetq_lane_u64 (uint64x2_t __a, const int __b)
2663 return __aarch64_vgetq_lane_u64 (__a, __b);
2666 /* vreinterpret */
2668 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2669 vreinterpret_p8_f64 (float64x1_t __a)
2671 return __builtin_aarch64_reinterpretv8qiv1df_ps (__a);
2674 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2675 vreinterpret_p8_s8 (int8x8_t __a)
2677 return (poly8x8_t) __a;
2680 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2681 vreinterpret_p8_s16 (int16x4_t __a)
2683 return (poly8x8_t) __a;
2686 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2687 vreinterpret_p8_s32 (int32x2_t __a)
2689 return (poly8x8_t) __a;
2692 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2693 vreinterpret_p8_s64 (int64x1_t __a)
2695 return (poly8x8_t) __a;
2698 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2699 vreinterpret_p8_f32 (float32x2_t __a)
2701 return (poly8x8_t) __a;
2704 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2705 vreinterpret_p8_u8 (uint8x8_t __a)
2707 return (poly8x8_t) __a;
2710 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2711 vreinterpret_p8_u16 (uint16x4_t __a)
2713 return (poly8x8_t) __a;
2716 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2717 vreinterpret_p8_u32 (uint32x2_t __a)
2719 return (poly8x8_t) __a;
2722 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2723 vreinterpret_p8_u64 (uint64x1_t __a)
2725 return (poly8x8_t) __a;
2728 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
2729 vreinterpret_p8_p16 (poly16x4_t __a)
2731 return (poly8x8_t) __a;
2734 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2735 vreinterpretq_p8_f64 (float64x2_t __a)
2737 return (poly8x16_t) __a;
2740 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2741 vreinterpretq_p8_s8 (int8x16_t __a)
2743 return (poly8x16_t) __a;
2746 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2747 vreinterpretq_p8_s16 (int16x8_t __a)
2749 return (poly8x16_t) __a;
2752 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2753 vreinterpretq_p8_s32 (int32x4_t __a)
2755 return (poly8x16_t) __a;
2758 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2759 vreinterpretq_p8_s64 (int64x2_t __a)
2761 return (poly8x16_t) __a;
2764 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2765 vreinterpretq_p8_f32 (float32x4_t __a)
2767 return (poly8x16_t) __a;
2770 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2771 vreinterpretq_p8_u8 (uint8x16_t __a)
2773 return (poly8x16_t) __a;
2776 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2777 vreinterpretq_p8_u16 (uint16x8_t __a)
2779 return (poly8x16_t) __a;
2782 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2783 vreinterpretq_p8_u32 (uint32x4_t __a)
2785 return (poly8x16_t) __a;
2788 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2789 vreinterpretq_p8_u64 (uint64x2_t __a)
2791 return (poly8x16_t) __a;
2794 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
2795 vreinterpretq_p8_p16 (poly16x8_t __a)
2797 return (poly8x16_t) __a;
2800 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2801 vreinterpret_p16_f64 (float64x1_t __a)
2803 return __builtin_aarch64_reinterpretv4hiv1df_ps (__a);
2806 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2807 vreinterpret_p16_s8 (int8x8_t __a)
2809 return (poly16x4_t) __a;
2812 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2813 vreinterpret_p16_s16 (int16x4_t __a)
2815 return (poly16x4_t) __a;
2818 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2819 vreinterpret_p16_s32 (int32x2_t __a)
2821 return (poly16x4_t) __a;
2824 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2825 vreinterpret_p16_s64 (int64x1_t __a)
2827 return (poly16x4_t) __a;
2830 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2831 vreinterpret_p16_f32 (float32x2_t __a)
2833 return (poly16x4_t) __a;
2836 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2837 vreinterpret_p16_u8 (uint8x8_t __a)
2839 return (poly16x4_t) __a;
2842 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2843 vreinterpret_p16_u16 (uint16x4_t __a)
2845 return (poly16x4_t) __a;
2848 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2849 vreinterpret_p16_u32 (uint32x2_t __a)
2851 return (poly16x4_t) __a;
2854 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2855 vreinterpret_p16_u64 (uint64x1_t __a)
2857 return (poly16x4_t) __a;
2860 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
2861 vreinterpret_p16_p8 (poly8x8_t __a)
2863 return (poly16x4_t) __a;
2866 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2867 vreinterpretq_p16_f64 (float64x2_t __a)
2869 return (poly16x8_t) __a;
2872 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2873 vreinterpretq_p16_s8 (int8x16_t __a)
2875 return (poly16x8_t) __a;
2878 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2879 vreinterpretq_p16_s16 (int16x8_t __a)
2881 return (poly16x8_t) __a;
2884 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2885 vreinterpretq_p16_s32 (int32x4_t __a)
2887 return (poly16x8_t) __a;
2890 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2891 vreinterpretq_p16_s64 (int64x2_t __a)
2893 return (poly16x8_t) __a;
2896 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2897 vreinterpretq_p16_f32 (float32x4_t __a)
2899 return (poly16x8_t) __a;
2902 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2903 vreinterpretq_p16_u8 (uint8x16_t __a)
2905 return (poly16x8_t) __a;
2908 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2909 vreinterpretq_p16_u16 (uint16x8_t __a)
2911 return (poly16x8_t) __a;
2914 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2915 vreinterpretq_p16_u32 (uint32x4_t __a)
2917 return (poly16x8_t) __a;
2920 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2921 vreinterpretq_p16_u64 (uint64x2_t __a)
2923 return (poly16x8_t) __a;
2926 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
2927 vreinterpretq_p16_p8 (poly8x16_t __a)
2929 return (poly16x8_t) __a;
2932 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2933 vreinterpret_f32_f64 (float64x1_t __a)
2935 return __builtin_aarch64_reinterpretv2sfv1df (__a);
2938 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2939 vreinterpret_f32_s8 (int8x8_t __a)
2941 return (float32x2_t) __a;
2944 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2945 vreinterpret_f32_s16 (int16x4_t __a)
2947 return (float32x2_t) __a;
2950 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2951 vreinterpret_f32_s32 (int32x2_t __a)
2953 return (float32x2_t) __a;
2956 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2957 vreinterpret_f32_s64 (int64x1_t __a)
2959 return (float32x2_t) __a;
2962 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2963 vreinterpret_f32_u8 (uint8x8_t __a)
2965 return (float32x2_t) __a;
2968 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2969 vreinterpret_f32_u16 (uint16x4_t __a)
2971 return (float32x2_t) __a;
2974 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2975 vreinterpret_f32_u32 (uint32x2_t __a)
2977 return (float32x2_t) __a;
2980 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2981 vreinterpret_f32_u64 (uint64x1_t __a)
2983 return (float32x2_t) __a;
2986 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2987 vreinterpret_f32_p8 (poly8x8_t __a)
2989 return (float32x2_t) __a;
2992 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
2993 vreinterpret_f32_p16 (poly16x4_t __a)
2995 return (float32x2_t) __a;
2998 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
2999 vreinterpretq_f32_f64 (float64x2_t __a)
3001 return (float32x4_t) __a;
3004 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3005 vreinterpretq_f32_s8 (int8x16_t __a)
3007 return (float32x4_t) __a;
3010 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3011 vreinterpretq_f32_s16 (int16x8_t __a)
3013 return (float32x4_t) __a;
3016 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3017 vreinterpretq_f32_s32 (int32x4_t __a)
3019 return (float32x4_t) __a;
3022 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3023 vreinterpretq_f32_s64 (int64x2_t __a)
3025 return (float32x4_t) __a;
3028 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3029 vreinterpretq_f32_u8 (uint8x16_t __a)
3031 return (float32x4_t) __a;
3034 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3035 vreinterpretq_f32_u16 (uint16x8_t __a)
3037 return (float32x4_t) __a;
3040 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3041 vreinterpretq_f32_u32 (uint32x4_t __a)
3043 return (float32x4_t) __a;
3046 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3047 vreinterpretq_f32_u64 (uint64x2_t __a)
3049 return (float32x4_t) __a;
3052 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3053 vreinterpretq_f32_p8 (poly8x16_t __a)
3055 return (float32x4_t) __a;
3058 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
3059 vreinterpretq_f32_p16 (poly16x8_t __a)
3061 return (float32x4_t) __a;
3064 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3065 vreinterpret_f64_f32 (float32x2_t __a)
3067 return __builtin_aarch64_reinterpretv1dfv2sf (__a);
3070 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3071 vreinterpret_f64_p8 (poly8x8_t __a)
3073 return __builtin_aarch64_reinterpretv1dfv8qi_sp (__a);
3076 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3077 vreinterpret_f64_p16 (poly16x4_t __a)
3079 return __builtin_aarch64_reinterpretv1dfv4hi_sp (__a);
3082 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3083 vreinterpret_f64_s8 (int8x8_t __a)
3085 return __builtin_aarch64_reinterpretv1dfv8qi (__a);
3088 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3089 vreinterpret_f64_s16 (int16x4_t __a)
3091 return __builtin_aarch64_reinterpretv1dfv4hi (__a);
3094 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3095 vreinterpret_f64_s32 (int32x2_t __a)
3097 return __builtin_aarch64_reinterpretv1dfv2si (__a);
3100 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3101 vreinterpret_f64_s64 (int64x1_t __a)
3103 return __builtin_aarch64_createv1df ((uint64_t) vget_lane_s64 (__a, 0));
3106 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3107 vreinterpret_f64_u8 (uint8x8_t __a)
3109 return __builtin_aarch64_reinterpretv1dfv8qi_su (__a);
3112 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3113 vreinterpret_f64_u16 (uint16x4_t __a)
3115 return __builtin_aarch64_reinterpretv1dfv4hi_su (__a);
3118 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3119 vreinterpret_f64_u32 (uint32x2_t __a)
3121 return __builtin_aarch64_reinterpretv1dfv2si_su (__a);
3124 __extension__ static __inline float64x1_t __attribute__((__always_inline__))
3125 vreinterpret_f64_u64 (uint64x1_t __a)
3127 return __builtin_aarch64_createv1df (vget_lane_u64 (__a, 0));
3130 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3131 vreinterpretq_f64_f32 (float32x4_t __a)
3133 return (float64x2_t) __a;
3136 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3137 vreinterpretq_f64_p8 (poly8x16_t __a)
3139 return (float64x2_t) __a;
3142 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3143 vreinterpretq_f64_p16 (poly16x8_t __a)
3145 return (float64x2_t) __a;
3148 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3149 vreinterpretq_f64_s8 (int8x16_t __a)
3151 return (float64x2_t) __a;
3154 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3155 vreinterpretq_f64_s16 (int16x8_t __a)
3157 return (float64x2_t) __a;
3160 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3161 vreinterpretq_f64_s32 (int32x4_t __a)
3163 return (float64x2_t) __a;
3166 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3167 vreinterpretq_f64_s64 (int64x2_t __a)
3169 return (float64x2_t) __a;
3172 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3173 vreinterpretq_f64_u8 (uint8x16_t __a)
3175 return (float64x2_t) __a;
3178 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3179 vreinterpretq_f64_u16 (uint16x8_t __a)
3181 return (float64x2_t) __a;
3184 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3185 vreinterpretq_f64_u32 (uint32x4_t __a)
3187 return (float64x2_t) __a;
3190 __extension__ static __inline float64x2_t __attribute__((__always_inline__))
3191 vreinterpretq_f64_u64 (uint64x2_t __a)
3193 return (float64x2_t) __a;
3196 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3197 vreinterpret_s64_f64 (float64x1_t __a)
3199 return (int64x1_t) {__builtin_aarch64_reinterpretdiv1df (__a)};
3202 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3203 vreinterpret_s64_s8 (int8x8_t __a)
3205 return (int64x1_t) __a;
3208 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3209 vreinterpret_s64_s16 (int16x4_t __a)
3211 return (int64x1_t) __a;
3214 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3215 vreinterpret_s64_s32 (int32x2_t __a)
3217 return (int64x1_t) __a;
3220 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3221 vreinterpret_s64_f32 (float32x2_t __a)
3223 return (int64x1_t) __a;
3226 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3227 vreinterpret_s64_u8 (uint8x8_t __a)
3229 return (int64x1_t) __a;
3232 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3233 vreinterpret_s64_u16 (uint16x4_t __a)
3235 return (int64x1_t) __a;
3238 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3239 vreinterpret_s64_u32 (uint32x2_t __a)
3241 return (int64x1_t) __a;
3244 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3245 vreinterpret_s64_u64 (uint64x1_t __a)
3247 return (int64x1_t) __a;
3250 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3251 vreinterpret_s64_p8 (poly8x8_t __a)
3253 return (int64x1_t) __a;
3256 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
3257 vreinterpret_s64_p16 (poly16x4_t __a)
3259 return (int64x1_t) __a;
3262 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3263 vreinterpretq_s64_f64 (float64x2_t __a)
3265 return (int64x2_t) __a;
3268 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3269 vreinterpretq_s64_s8 (int8x16_t __a)
3271 return (int64x2_t) __a;
3274 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3275 vreinterpretq_s64_s16 (int16x8_t __a)
3277 return (int64x2_t) __a;
3280 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3281 vreinterpretq_s64_s32 (int32x4_t __a)
3283 return (int64x2_t) __a;
3286 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3287 vreinterpretq_s64_f32 (float32x4_t __a)
3289 return (int64x2_t) __a;
3292 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3293 vreinterpretq_s64_u8 (uint8x16_t __a)
3295 return (int64x2_t) __a;
3298 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3299 vreinterpretq_s64_u16 (uint16x8_t __a)
3301 return (int64x2_t) __a;
3304 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3305 vreinterpretq_s64_u32 (uint32x4_t __a)
3307 return (int64x2_t) __a;
3310 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3311 vreinterpretq_s64_u64 (uint64x2_t __a)
3313 return (int64x2_t) __a;
3316 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3317 vreinterpretq_s64_p8 (poly8x16_t __a)
3319 return (int64x2_t) __a;
3322 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
3323 vreinterpretq_s64_p16 (poly16x8_t __a)
3325 return (int64x2_t) __a;
3328 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3329 vreinterpret_u64_f64 (float64x1_t __a)
3331 return (uint64x1_t) {__builtin_aarch64_reinterpretdiv1df_us (__a)};
3334 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3335 vreinterpret_u64_s8 (int8x8_t __a)
3337 return (uint64x1_t) __a;
3340 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3341 vreinterpret_u64_s16 (int16x4_t __a)
3343 return (uint64x1_t) __a;
3346 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3347 vreinterpret_u64_s32 (int32x2_t __a)
3349 return (uint64x1_t) __a;
3352 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3353 vreinterpret_u64_s64 (int64x1_t __a)
3355 return (uint64x1_t) __a;
3358 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3359 vreinterpret_u64_f32 (float32x2_t __a)
3361 return (uint64x1_t) __a;
3364 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3365 vreinterpret_u64_u8 (uint8x8_t __a)
3367 return (uint64x1_t) __a;
3370 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3371 vreinterpret_u64_u16 (uint16x4_t __a)
3373 return (uint64x1_t) __a;
3376 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3377 vreinterpret_u64_u32 (uint32x2_t __a)
3379 return (uint64x1_t) __a;
3382 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3383 vreinterpret_u64_p8 (poly8x8_t __a)
3385 return (uint64x1_t) __a;
3388 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
3389 vreinterpret_u64_p16 (poly16x4_t __a)
3391 return (uint64x1_t) __a;
3394 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3395 vreinterpretq_u64_f64 (float64x2_t __a)
3397 return (uint64x2_t) __a;
3400 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3401 vreinterpretq_u64_s8 (int8x16_t __a)
3403 return (uint64x2_t) __a;
3406 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3407 vreinterpretq_u64_s16 (int16x8_t __a)
3409 return (uint64x2_t) __a;
3412 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3413 vreinterpretq_u64_s32 (int32x4_t __a)
3415 return (uint64x2_t) __a;
3418 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3419 vreinterpretq_u64_s64 (int64x2_t __a)
3421 return (uint64x2_t) __a;
3424 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3425 vreinterpretq_u64_f32 (float32x4_t __a)
3427 return (uint64x2_t) __a;
3430 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3431 vreinterpretq_u64_u8 (uint8x16_t __a)
3433 return (uint64x2_t) __a;
3436 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3437 vreinterpretq_u64_u16 (uint16x8_t __a)
3439 return (uint64x2_t) __a;
3442 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3443 vreinterpretq_u64_u32 (uint32x4_t __a)
3445 return (uint64x2_t) __a;
3448 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3449 vreinterpretq_u64_p8 (poly8x16_t __a)
3451 return (uint64x2_t) __a;
3454 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
3455 vreinterpretq_u64_p16 (poly16x8_t __a)
3457 return (uint64x2_t) __a;
3460 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3461 vreinterpret_s8_f64 (float64x1_t __a)
3463 return __builtin_aarch64_reinterpretv8qiv1df (__a);
3466 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3467 vreinterpret_s8_s16 (int16x4_t __a)
3469 return (int8x8_t) __a;
3472 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3473 vreinterpret_s8_s32 (int32x2_t __a)
3475 return (int8x8_t) __a;
3478 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3479 vreinterpret_s8_s64 (int64x1_t __a)
3481 return (int8x8_t) __a;
3484 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3485 vreinterpret_s8_f32 (float32x2_t __a)
3487 return (int8x8_t) __a;
3490 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3491 vreinterpret_s8_u8 (uint8x8_t __a)
3493 return (int8x8_t) __a;
3496 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3497 vreinterpret_s8_u16 (uint16x4_t __a)
3499 return (int8x8_t) __a;
3502 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3503 vreinterpret_s8_u32 (uint32x2_t __a)
3505 return (int8x8_t) __a;
3508 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3509 vreinterpret_s8_u64 (uint64x1_t __a)
3511 return (int8x8_t) __a;
3514 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3515 vreinterpret_s8_p8 (poly8x8_t __a)
3517 return (int8x8_t) __a;
3520 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
3521 vreinterpret_s8_p16 (poly16x4_t __a)
3523 return (int8x8_t) __a;
3526 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3527 vreinterpretq_s8_f64 (float64x2_t __a)
3529 return (int8x16_t) __a;
3532 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3533 vreinterpretq_s8_s16 (int16x8_t __a)
3535 return (int8x16_t) __a;
3538 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3539 vreinterpretq_s8_s32 (int32x4_t __a)
3541 return (int8x16_t) __a;
3544 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3545 vreinterpretq_s8_s64 (int64x2_t __a)
3547 return (int8x16_t) __a;
3550 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3551 vreinterpretq_s8_f32 (float32x4_t __a)
3553 return (int8x16_t) __a;
3556 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3557 vreinterpretq_s8_u8 (uint8x16_t __a)
3559 return (int8x16_t) __a;
3562 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3563 vreinterpretq_s8_u16 (uint16x8_t __a)
3565 return (int8x16_t) __a;
3568 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3569 vreinterpretq_s8_u32 (uint32x4_t __a)
3571 return (int8x16_t) __a;
3574 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3575 vreinterpretq_s8_u64 (uint64x2_t __a)
3577 return (int8x16_t) __a;
3580 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3581 vreinterpretq_s8_p8 (poly8x16_t __a)
3583 return (int8x16_t) __a;
3586 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
3587 vreinterpretq_s8_p16 (poly16x8_t __a)
3589 return (int8x16_t) __a;
3592 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3593 vreinterpret_s16_f64 (float64x1_t __a)
3595 return __builtin_aarch64_reinterpretv4hiv1df (__a);
3598 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3599 vreinterpret_s16_s8 (int8x8_t __a)
3601 return (int16x4_t) __a;
3604 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3605 vreinterpret_s16_s32 (int32x2_t __a)
3607 return (int16x4_t) __a;
3610 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3611 vreinterpret_s16_s64 (int64x1_t __a)
3613 return (int16x4_t) __a;
3616 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3617 vreinterpret_s16_f32 (float32x2_t __a)
3619 return (int16x4_t) __a;
3622 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3623 vreinterpret_s16_u8 (uint8x8_t __a)
3625 return (int16x4_t) __a;
3628 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3629 vreinterpret_s16_u16 (uint16x4_t __a)
3631 return (int16x4_t) __a;
3634 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3635 vreinterpret_s16_u32 (uint32x2_t __a)
3637 return (int16x4_t) __a;
3640 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3641 vreinterpret_s16_u64 (uint64x1_t __a)
3643 return (int16x4_t) __a;
3646 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3647 vreinterpret_s16_p8 (poly8x8_t __a)
3649 return (int16x4_t) __a;
3652 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
3653 vreinterpret_s16_p16 (poly16x4_t __a)
3655 return (int16x4_t) __a;
3658 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3659 vreinterpretq_s16_f64 (float64x2_t __a)
3661 return (int16x8_t) __a;
3664 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3665 vreinterpretq_s16_s8 (int8x16_t __a)
3667 return (int16x8_t) __a;
3670 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3671 vreinterpretq_s16_s32 (int32x4_t __a)
3673 return (int16x8_t) __a;
3676 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3677 vreinterpretq_s16_s64 (int64x2_t __a)
3679 return (int16x8_t) __a;
3682 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3683 vreinterpretq_s16_f32 (float32x4_t __a)
3685 return (int16x8_t) __a;
3688 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3689 vreinterpretq_s16_u8 (uint8x16_t __a)
3691 return (int16x8_t) __a;
3694 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3695 vreinterpretq_s16_u16 (uint16x8_t __a)
3697 return (int16x8_t) __a;
3700 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3701 vreinterpretq_s16_u32 (uint32x4_t __a)
3703 return (int16x8_t) __a;
3706 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3707 vreinterpretq_s16_u64 (uint64x2_t __a)
3709 return (int16x8_t) __a;
3712 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3713 vreinterpretq_s16_p8 (poly8x16_t __a)
3715 return (int16x8_t) __a;
3718 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
3719 vreinterpretq_s16_p16 (poly16x8_t __a)
3721 return (int16x8_t) __a;
3724 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3725 vreinterpret_s32_f64 (float64x1_t __a)
3727 return __builtin_aarch64_reinterpretv2siv1df (__a);
3730 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3731 vreinterpret_s32_s8 (int8x8_t __a)
3733 return (int32x2_t) __a;
3736 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3737 vreinterpret_s32_s16 (int16x4_t __a)
3739 return (int32x2_t) __a;
3742 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3743 vreinterpret_s32_s64 (int64x1_t __a)
3745 return (int32x2_t) __a;
3748 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3749 vreinterpret_s32_f32 (float32x2_t __a)
3751 return (int32x2_t) __a;
3754 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3755 vreinterpret_s32_u8 (uint8x8_t __a)
3757 return (int32x2_t) __a;
3760 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3761 vreinterpret_s32_u16 (uint16x4_t __a)
3763 return (int32x2_t) __a;
3766 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3767 vreinterpret_s32_u32 (uint32x2_t __a)
3769 return (int32x2_t) __a;
3772 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3773 vreinterpret_s32_u64 (uint64x1_t __a)
3775 return (int32x2_t) __a;
3778 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3779 vreinterpret_s32_p8 (poly8x8_t __a)
3781 return (int32x2_t) __a;
3784 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
3785 vreinterpret_s32_p16 (poly16x4_t __a)
3787 return (int32x2_t) __a;
3790 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3791 vreinterpretq_s32_f64 (float64x2_t __a)
3793 return (int32x4_t) __a;
3796 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3797 vreinterpretq_s32_s8 (int8x16_t __a)
3799 return (int32x4_t) __a;
3802 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3803 vreinterpretq_s32_s16 (int16x8_t __a)
3805 return (int32x4_t) __a;
3808 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3809 vreinterpretq_s32_s64 (int64x2_t __a)
3811 return (int32x4_t) __a;
3814 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3815 vreinterpretq_s32_f32 (float32x4_t __a)
3817 return (int32x4_t) __a;
3820 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3821 vreinterpretq_s32_u8 (uint8x16_t __a)
3823 return (int32x4_t) __a;
3826 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3827 vreinterpretq_s32_u16 (uint16x8_t __a)
3829 return (int32x4_t) __a;
3832 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3833 vreinterpretq_s32_u32 (uint32x4_t __a)
3835 return (int32x4_t) __a;
3838 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3839 vreinterpretq_s32_u64 (uint64x2_t __a)
3841 return (int32x4_t) __a;
3844 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3845 vreinterpretq_s32_p8 (poly8x16_t __a)
3847 return (int32x4_t) __a;
3850 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
3851 vreinterpretq_s32_p16 (poly16x8_t __a)
3853 return (int32x4_t) __a;
3856 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3857 vreinterpret_u8_f64 (float64x1_t __a)
3859 return __builtin_aarch64_reinterpretv8qiv1df_us (__a);
3862 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3863 vreinterpret_u8_s8 (int8x8_t __a)
3865 return (uint8x8_t) __a;
3868 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3869 vreinterpret_u8_s16 (int16x4_t __a)
3871 return (uint8x8_t) __a;
3874 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3875 vreinterpret_u8_s32 (int32x2_t __a)
3877 return (uint8x8_t) __a;
3880 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3881 vreinterpret_u8_s64 (int64x1_t __a)
3883 return (uint8x8_t) __a;
3886 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3887 vreinterpret_u8_f32 (float32x2_t __a)
3889 return (uint8x8_t) __a;
3892 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3893 vreinterpret_u8_u16 (uint16x4_t __a)
3895 return (uint8x8_t) __a;
3898 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3899 vreinterpret_u8_u32 (uint32x2_t __a)
3901 return (uint8x8_t) __a;
3904 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3905 vreinterpret_u8_u64 (uint64x1_t __a)
3907 return (uint8x8_t) __a;
3910 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3911 vreinterpret_u8_p8 (poly8x8_t __a)
3913 return (uint8x8_t) __a;
3916 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
3917 vreinterpret_u8_p16 (poly16x4_t __a)
3919 return (uint8x8_t) __a;
3922 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3923 vreinterpretq_u8_f64 (float64x2_t __a)
3925 return (uint8x16_t) __a;
3928 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3929 vreinterpretq_u8_s8 (int8x16_t __a)
3931 return (uint8x16_t) __a;
3934 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3935 vreinterpretq_u8_s16 (int16x8_t __a)
3937 return (uint8x16_t) __a;
3940 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3941 vreinterpretq_u8_s32 (int32x4_t __a)
3943 return (uint8x16_t) __a;
3946 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3947 vreinterpretq_u8_s64 (int64x2_t __a)
3949 return (uint8x16_t) __a;
3952 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3953 vreinterpretq_u8_f32 (float32x4_t __a)
3955 return (uint8x16_t) __a;
3958 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3959 vreinterpretq_u8_u16 (uint16x8_t __a)
3961 return (uint8x16_t) __a;
3964 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3965 vreinterpretq_u8_u32 (uint32x4_t __a)
3967 return (uint8x16_t) __a;
3970 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3971 vreinterpretq_u8_u64 (uint64x2_t __a)
3973 return (uint8x16_t) __a;
3976 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3977 vreinterpretq_u8_p8 (poly8x16_t __a)
3979 return (uint8x16_t) __a;
3982 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
3983 vreinterpretq_u8_p16 (poly16x8_t __a)
3985 return (uint8x16_t) __a;
3988 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3989 vreinterpret_u16_f64 (float64x1_t __a)
3991 return __builtin_aarch64_reinterpretv4hiv1df_us (__a);
3994 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
3995 vreinterpret_u16_s8 (int8x8_t __a)
3997 return (uint16x4_t) __a;
4000 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4001 vreinterpret_u16_s16 (int16x4_t __a)
4003 return (uint16x4_t) __a;
4006 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4007 vreinterpret_u16_s32 (int32x2_t __a)
4009 return (uint16x4_t) __a;
4012 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4013 vreinterpret_u16_s64 (int64x1_t __a)
4015 return (uint16x4_t) __a;
4018 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4019 vreinterpret_u16_f32 (float32x2_t __a)
4021 return (uint16x4_t) __a;
4024 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4025 vreinterpret_u16_u8 (uint8x8_t __a)
4027 return (uint16x4_t) __a;
4030 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4031 vreinterpret_u16_u32 (uint32x2_t __a)
4033 return (uint16x4_t) __a;
4036 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4037 vreinterpret_u16_u64 (uint64x1_t __a)
4039 return (uint16x4_t) __a;
4042 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4043 vreinterpret_u16_p8 (poly8x8_t __a)
4045 return (uint16x4_t) __a;
4048 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4049 vreinterpret_u16_p16 (poly16x4_t __a)
4051 return (uint16x4_t) __a;
4054 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4055 vreinterpretq_u16_f64 (float64x2_t __a)
4057 return (uint16x8_t) __a;
4060 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4061 vreinterpretq_u16_s8 (int8x16_t __a)
4063 return (uint16x8_t) __a;
4066 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4067 vreinterpretq_u16_s16 (int16x8_t __a)
4069 return (uint16x8_t) __a;
4072 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4073 vreinterpretq_u16_s32 (int32x4_t __a)
4075 return (uint16x8_t) __a;
4078 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4079 vreinterpretq_u16_s64 (int64x2_t __a)
4081 return (uint16x8_t) __a;
4084 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4085 vreinterpretq_u16_f32 (float32x4_t __a)
4087 return (uint16x8_t) __a;
4090 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4091 vreinterpretq_u16_u8 (uint8x16_t __a)
4093 return (uint16x8_t) __a;
4096 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4097 vreinterpretq_u16_u32 (uint32x4_t __a)
4099 return (uint16x8_t) __a;
4102 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4103 vreinterpretq_u16_u64 (uint64x2_t __a)
4105 return (uint16x8_t) __a;
4108 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4109 vreinterpretq_u16_p8 (poly8x16_t __a)
4111 return (uint16x8_t) __a;
4114 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4115 vreinterpretq_u16_p16 (poly16x8_t __a)
4117 return (uint16x8_t) __a;
4120 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4121 vreinterpret_u32_f64 (float64x1_t __a)
4123 return __builtin_aarch64_reinterpretv2siv1df_us (__a);
4126 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4127 vreinterpret_u32_s8 (int8x8_t __a)
4129 return (uint32x2_t) __a;
4132 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4133 vreinterpret_u32_s16 (int16x4_t __a)
4135 return (uint32x2_t) __a;
4138 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4139 vreinterpret_u32_s32 (int32x2_t __a)
4141 return (uint32x2_t) __a;
4144 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4145 vreinterpret_u32_s64 (int64x1_t __a)
4147 return (uint32x2_t) __a;
4150 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4151 vreinterpret_u32_f32 (float32x2_t __a)
4153 return (uint32x2_t) __a;
4156 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4157 vreinterpret_u32_u8 (uint8x8_t __a)
4159 return (uint32x2_t) __a;
4162 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4163 vreinterpret_u32_u16 (uint16x4_t __a)
4165 return (uint32x2_t) __a;
4168 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4169 vreinterpret_u32_u64 (uint64x1_t __a)
4171 return (uint32x2_t) __a;
4174 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4175 vreinterpret_u32_p8 (poly8x8_t __a)
4177 return (uint32x2_t) __a;
4180 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4181 vreinterpret_u32_p16 (poly16x4_t __a)
4183 return (uint32x2_t) __a;
4186 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4187 vreinterpretq_u32_f64 (float64x2_t __a)
4189 return (uint32x4_t) __a;
4192 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4193 vreinterpretq_u32_s8 (int8x16_t __a)
4195 return (uint32x4_t) __a;
4198 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4199 vreinterpretq_u32_s16 (int16x8_t __a)
4201 return (uint32x4_t) __a;
4204 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4205 vreinterpretq_u32_s32 (int32x4_t __a)
4207 return (uint32x4_t) __a;
4210 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4211 vreinterpretq_u32_s64 (int64x2_t __a)
4213 return (uint32x4_t) __a;
4216 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4217 vreinterpretq_u32_f32 (float32x4_t __a)
4219 return (uint32x4_t) __a;
4222 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4223 vreinterpretq_u32_u8 (uint8x16_t __a)
4225 return (uint32x4_t) __a;
4228 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4229 vreinterpretq_u32_u16 (uint16x8_t __a)
4231 return (uint32x4_t) __a;
4234 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4235 vreinterpretq_u32_u64 (uint64x2_t __a)
4237 return (uint32x4_t) __a;
4240 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4241 vreinterpretq_u32_p8 (poly8x16_t __a)
4243 return (uint32x4_t) __a;
4246 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4247 vreinterpretq_u32_p16 (poly16x8_t __a)
4249 return (uint32x4_t) __a;
4252 #define __GET_LOW(__TYPE) \
4253 uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
4254 uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \
4255 return vreinterpret_##__TYPE##_u64 (lo);
4257 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4258 vget_low_f32 (float32x4_t __a)
4260 __GET_LOW (f32);
4263 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
4264 vget_low_f64 (float64x2_t __a)
4266 return (float64x1_t) {vgetq_lane_f64 (__a, 0)};
4269 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
4270 vget_low_p8 (poly8x16_t __a)
4272 __GET_LOW (p8);
4275 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
4276 vget_low_p16 (poly16x8_t __a)
4278 __GET_LOW (p16);
4281 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4282 vget_low_s8 (int8x16_t __a)
4284 __GET_LOW (s8);
4287 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4288 vget_low_s16 (int16x8_t __a)
4290 __GET_LOW (s16);
4293 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4294 vget_low_s32 (int32x4_t __a)
4296 __GET_LOW (s32);
4299 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
4300 vget_low_s64 (int64x2_t __a)
4302 __GET_LOW (s64);
4305 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4306 vget_low_u8 (uint8x16_t __a)
4308 __GET_LOW (u8);
4311 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4312 vget_low_u16 (uint16x8_t __a)
4314 __GET_LOW (u16);
4317 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4318 vget_low_u32 (uint32x4_t __a)
4320 __GET_LOW (u32);
4323 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
4324 vget_low_u64 (uint64x2_t __a)
4326 return vcreate_u64 (vgetq_lane_u64 (__a, 0));
4329 #undef __GET_LOW
4331 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4332 vcombine_s8 (int8x8_t __a, int8x8_t __b)
4334 return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b);
4337 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4338 vcombine_s16 (int16x4_t __a, int16x4_t __b)
4340 return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b);
4343 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4344 vcombine_s32 (int32x2_t __a, int32x2_t __b)
4346 return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b);
4349 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4350 vcombine_s64 (int64x1_t __a, int64x1_t __b)
4352 return __builtin_aarch64_combinedi (__a[0], __b[0]);
4355 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4356 vcombine_f32 (float32x2_t __a, float32x2_t __b)
4358 return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b);
4361 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4362 vcombine_u8 (uint8x8_t __a, uint8x8_t __b)
4364 return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4365 (int8x8_t) __b);
4368 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4369 vcombine_u16 (uint16x4_t __a, uint16x4_t __b)
4371 return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4372 (int16x4_t) __b);
4375 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4376 vcombine_u32 (uint32x2_t __a, uint32x2_t __b)
4378 return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a,
4379 (int32x2_t) __b);
4382 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4383 vcombine_u64 (uint64x1_t __a, uint64x1_t __b)
4385 return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]);
4388 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4389 vcombine_f64 (float64x1_t __a, float64x1_t __b)
4391 return __builtin_aarch64_combinedf (__a[0], __b[0]);
4394 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
4395 vcombine_p8 (poly8x8_t __a, poly8x8_t __b)
4397 return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a,
4398 (int8x8_t) __b);
4401 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
4402 vcombine_p16 (poly16x4_t __a, poly16x4_t __b)
4404 return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a,
4405 (int16x4_t) __b);
4408 /* Start of temporary inline asm implementations. */
4410 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4411 vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
4413 int8x8_t result;
4414 __asm__ ("saba %0.8b,%2.8b,%3.8b"
4415 : "=w"(result)
4416 : "0"(a), "w"(b), "w"(c)
4417 : /* No clobbers */);
4418 return result;
4421 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4422 vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
4424 int16x4_t result;
4425 __asm__ ("saba %0.4h,%2.4h,%3.4h"
4426 : "=w"(result)
4427 : "0"(a), "w"(b), "w"(c)
4428 : /* No clobbers */);
4429 return result;
4432 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4433 vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
4435 int32x2_t result;
4436 __asm__ ("saba %0.2s,%2.2s,%3.2s"
4437 : "=w"(result)
4438 : "0"(a), "w"(b), "w"(c)
4439 : /* No clobbers */);
4440 return result;
4443 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4444 vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
4446 uint8x8_t result;
4447 __asm__ ("uaba %0.8b,%2.8b,%3.8b"
4448 : "=w"(result)
4449 : "0"(a), "w"(b), "w"(c)
4450 : /* No clobbers */);
4451 return result;
4454 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4455 vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
4457 uint16x4_t result;
4458 __asm__ ("uaba %0.4h,%2.4h,%3.4h"
4459 : "=w"(result)
4460 : "0"(a), "w"(b), "w"(c)
4461 : /* No clobbers */);
4462 return result;
4465 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4466 vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
4468 uint32x2_t result;
4469 __asm__ ("uaba %0.2s,%2.2s,%3.2s"
4470 : "=w"(result)
4471 : "0"(a), "w"(b), "w"(c)
4472 : /* No clobbers */);
4473 return result;
4476 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4477 vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
4479 int16x8_t result;
4480 __asm__ ("sabal2 %0.8h,%2.16b,%3.16b"
4481 : "=w"(result)
4482 : "0"(a), "w"(b), "w"(c)
4483 : /* No clobbers */);
4484 return result;
4487 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4488 vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
4490 int32x4_t result;
4491 __asm__ ("sabal2 %0.4s,%2.8h,%3.8h"
4492 : "=w"(result)
4493 : "0"(a), "w"(b), "w"(c)
4494 : /* No clobbers */);
4495 return result;
4498 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4499 vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
4501 int64x2_t result;
4502 __asm__ ("sabal2 %0.2d,%2.4s,%3.4s"
4503 : "=w"(result)
4504 : "0"(a), "w"(b), "w"(c)
4505 : /* No clobbers */);
4506 return result;
4509 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4510 vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
4512 uint16x8_t result;
4513 __asm__ ("uabal2 %0.8h,%2.16b,%3.16b"
4514 : "=w"(result)
4515 : "0"(a), "w"(b), "w"(c)
4516 : /* No clobbers */);
4517 return result;
4520 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4521 vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
4523 uint32x4_t result;
4524 __asm__ ("uabal2 %0.4s,%2.8h,%3.8h"
4525 : "=w"(result)
4526 : "0"(a), "w"(b), "w"(c)
4527 : /* No clobbers */);
4528 return result;
4531 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4532 vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
4534 uint64x2_t result;
4535 __asm__ ("uabal2 %0.2d,%2.4s,%3.4s"
4536 : "=w"(result)
4537 : "0"(a), "w"(b), "w"(c)
4538 : /* No clobbers */);
4539 return result;
4542 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4543 vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
4545 int16x8_t result;
4546 __asm__ ("sabal %0.8h,%2.8b,%3.8b"
4547 : "=w"(result)
4548 : "0"(a), "w"(b), "w"(c)
4549 : /* No clobbers */);
4550 return result;
4553 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4554 vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
4556 int32x4_t result;
4557 __asm__ ("sabal %0.4s,%2.4h,%3.4h"
4558 : "=w"(result)
4559 : "0"(a), "w"(b), "w"(c)
4560 : /* No clobbers */);
4561 return result;
4564 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4565 vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
4567 int64x2_t result;
4568 __asm__ ("sabal %0.2d,%2.2s,%3.2s"
4569 : "=w"(result)
4570 : "0"(a), "w"(b), "w"(c)
4571 : /* No clobbers */);
4572 return result;
4575 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4576 vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
4578 uint16x8_t result;
4579 __asm__ ("uabal %0.8h,%2.8b,%3.8b"
4580 : "=w"(result)
4581 : "0"(a), "w"(b), "w"(c)
4582 : /* No clobbers */);
4583 return result;
4586 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4587 vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
4589 uint32x4_t result;
4590 __asm__ ("uabal %0.4s,%2.4h,%3.4h"
4591 : "=w"(result)
4592 : "0"(a), "w"(b), "w"(c)
4593 : /* No clobbers */);
4594 return result;
4597 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4598 vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
4600 uint64x2_t result;
4601 __asm__ ("uabal %0.2d,%2.2s,%3.2s"
4602 : "=w"(result)
4603 : "0"(a), "w"(b), "w"(c)
4604 : /* No clobbers */);
4605 return result;
4608 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4609 vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
4611 int8x16_t result;
4612 __asm__ ("saba %0.16b,%2.16b,%3.16b"
4613 : "=w"(result)
4614 : "0"(a), "w"(b), "w"(c)
4615 : /* No clobbers */);
4616 return result;
4619 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4620 vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
4622 int16x8_t result;
4623 __asm__ ("saba %0.8h,%2.8h,%3.8h"
4624 : "=w"(result)
4625 : "0"(a), "w"(b), "w"(c)
4626 : /* No clobbers */);
4627 return result;
4630 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4631 vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
4633 int32x4_t result;
4634 __asm__ ("saba %0.4s,%2.4s,%3.4s"
4635 : "=w"(result)
4636 : "0"(a), "w"(b), "w"(c)
4637 : /* No clobbers */);
4638 return result;
4641 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4642 vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
4644 uint8x16_t result;
4645 __asm__ ("uaba %0.16b,%2.16b,%3.16b"
4646 : "=w"(result)
4647 : "0"(a), "w"(b), "w"(c)
4648 : /* No clobbers */);
4649 return result;
4652 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4653 vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
4655 uint16x8_t result;
4656 __asm__ ("uaba %0.8h,%2.8h,%3.8h"
4657 : "=w"(result)
4658 : "0"(a), "w"(b), "w"(c)
4659 : /* No clobbers */);
4660 return result;
4663 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4664 vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
4666 uint32x4_t result;
4667 __asm__ ("uaba %0.4s,%2.4s,%3.4s"
4668 : "=w"(result)
4669 : "0"(a), "w"(b), "w"(c)
4670 : /* No clobbers */);
4671 return result;
4674 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
4675 vabd_f32 (float32x2_t a, float32x2_t b)
4677 float32x2_t result;
4678 __asm__ ("fabd %0.2s, %1.2s, %2.2s"
4679 : "=w"(result)
4680 : "w"(a), "w"(b)
4681 : /* No clobbers */);
4682 return result;
4685 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
4686 vabd_s8 (int8x8_t a, int8x8_t b)
4688 int8x8_t result;
4689 __asm__ ("sabd %0.8b, %1.8b, %2.8b"
4690 : "=w"(result)
4691 : "w"(a), "w"(b)
4692 : /* No clobbers */);
4693 return result;
4696 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
4697 vabd_s16 (int16x4_t a, int16x4_t b)
4699 int16x4_t result;
4700 __asm__ ("sabd %0.4h, %1.4h, %2.4h"
4701 : "=w"(result)
4702 : "w"(a), "w"(b)
4703 : /* No clobbers */);
4704 return result;
4707 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
4708 vabd_s32 (int32x2_t a, int32x2_t b)
4710 int32x2_t result;
4711 __asm__ ("sabd %0.2s, %1.2s, %2.2s"
4712 : "=w"(result)
4713 : "w"(a), "w"(b)
4714 : /* No clobbers */);
4715 return result;
4718 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
4719 vabd_u8 (uint8x8_t a, uint8x8_t b)
4721 uint8x8_t result;
4722 __asm__ ("uabd %0.8b, %1.8b, %2.8b"
4723 : "=w"(result)
4724 : "w"(a), "w"(b)
4725 : /* No clobbers */);
4726 return result;
4729 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
4730 vabd_u16 (uint16x4_t a, uint16x4_t b)
4732 uint16x4_t result;
4733 __asm__ ("uabd %0.4h, %1.4h, %2.4h"
4734 : "=w"(result)
4735 : "w"(a), "w"(b)
4736 : /* No clobbers */);
4737 return result;
4740 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
4741 vabd_u32 (uint32x2_t a, uint32x2_t b)
4743 uint32x2_t result;
4744 __asm__ ("uabd %0.2s, %1.2s, %2.2s"
4745 : "=w"(result)
4746 : "w"(a), "w"(b)
4747 : /* No clobbers */);
4748 return result;
4751 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
4752 vabdd_f64 (float64_t a, float64_t b)
4754 float64_t result;
4755 __asm__ ("fabd %d0, %d1, %d2"
4756 : "=w"(result)
4757 : "w"(a), "w"(b)
4758 : /* No clobbers */);
4759 return result;
4762 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4763 vabdl_high_s8 (int8x16_t a, int8x16_t b)
4765 int16x8_t result;
4766 __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b"
4767 : "=w"(result)
4768 : "w"(a), "w"(b)
4769 : /* No clobbers */);
4770 return result;
4773 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4774 vabdl_high_s16 (int16x8_t a, int16x8_t b)
4776 int32x4_t result;
4777 __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h"
4778 : "=w"(result)
4779 : "w"(a), "w"(b)
4780 : /* No clobbers */);
4781 return result;
4784 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4785 vabdl_high_s32 (int32x4_t a, int32x4_t b)
4787 int64x2_t result;
4788 __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s"
4789 : "=w"(result)
4790 : "w"(a), "w"(b)
4791 : /* No clobbers */);
4792 return result;
4795 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4796 vabdl_high_u8 (uint8x16_t a, uint8x16_t b)
4798 uint16x8_t result;
4799 __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b"
4800 : "=w"(result)
4801 : "w"(a), "w"(b)
4802 : /* No clobbers */);
4803 return result;
4806 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4807 vabdl_high_u16 (uint16x8_t a, uint16x8_t b)
4809 uint32x4_t result;
4810 __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h"
4811 : "=w"(result)
4812 : "w"(a), "w"(b)
4813 : /* No clobbers */);
4814 return result;
4817 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4818 vabdl_high_u32 (uint32x4_t a, uint32x4_t b)
4820 uint64x2_t result;
4821 __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s"
4822 : "=w"(result)
4823 : "w"(a), "w"(b)
4824 : /* No clobbers */);
4825 return result;
4828 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4829 vabdl_s8 (int8x8_t a, int8x8_t b)
4831 int16x8_t result;
4832 __asm__ ("sabdl %0.8h, %1.8b, %2.8b"
4833 : "=w"(result)
4834 : "w"(a), "w"(b)
4835 : /* No clobbers */);
4836 return result;
4839 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4840 vabdl_s16 (int16x4_t a, int16x4_t b)
4842 int32x4_t result;
4843 __asm__ ("sabdl %0.4s, %1.4h, %2.4h"
4844 : "=w"(result)
4845 : "w"(a), "w"(b)
4846 : /* No clobbers */);
4847 return result;
4850 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
4851 vabdl_s32 (int32x2_t a, int32x2_t b)
4853 int64x2_t result;
4854 __asm__ ("sabdl %0.2d, %1.2s, %2.2s"
4855 : "=w"(result)
4856 : "w"(a), "w"(b)
4857 : /* No clobbers */);
4858 return result;
4861 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4862 vabdl_u8 (uint8x8_t a, uint8x8_t b)
4864 uint16x8_t result;
4865 __asm__ ("uabdl %0.8h, %1.8b, %2.8b"
4866 : "=w"(result)
4867 : "w"(a), "w"(b)
4868 : /* No clobbers */);
4869 return result;
4872 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4873 vabdl_u16 (uint16x4_t a, uint16x4_t b)
4875 uint32x4_t result;
4876 __asm__ ("uabdl %0.4s, %1.4h, %2.4h"
4877 : "=w"(result)
4878 : "w"(a), "w"(b)
4879 : /* No clobbers */);
4880 return result;
4883 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
4884 vabdl_u32 (uint32x2_t a, uint32x2_t b)
4886 uint64x2_t result;
4887 __asm__ ("uabdl %0.2d, %1.2s, %2.2s"
4888 : "=w"(result)
4889 : "w"(a), "w"(b)
4890 : /* No clobbers */);
4891 return result;
4894 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
4895 vabdq_f32 (float32x4_t a, float32x4_t b)
4897 float32x4_t result;
4898 __asm__ ("fabd %0.4s, %1.4s, %2.4s"
4899 : "=w"(result)
4900 : "w"(a), "w"(b)
4901 : /* No clobbers */);
4902 return result;
4905 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
4906 vabdq_f64 (float64x2_t a, float64x2_t b)
4908 float64x2_t result;
4909 __asm__ ("fabd %0.2d, %1.2d, %2.2d"
4910 : "=w"(result)
4911 : "w"(a), "w"(b)
4912 : /* No clobbers */);
4913 return result;
4916 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
4917 vabdq_s8 (int8x16_t a, int8x16_t b)
4919 int8x16_t result;
4920 __asm__ ("sabd %0.16b, %1.16b, %2.16b"
4921 : "=w"(result)
4922 : "w"(a), "w"(b)
4923 : /* No clobbers */);
4924 return result;
4927 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
4928 vabdq_s16 (int16x8_t a, int16x8_t b)
4930 int16x8_t result;
4931 __asm__ ("sabd %0.8h, %1.8h, %2.8h"
4932 : "=w"(result)
4933 : "w"(a), "w"(b)
4934 : /* No clobbers */);
4935 return result;
4938 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
4939 vabdq_s32 (int32x4_t a, int32x4_t b)
4941 int32x4_t result;
4942 __asm__ ("sabd %0.4s, %1.4s, %2.4s"
4943 : "=w"(result)
4944 : "w"(a), "w"(b)
4945 : /* No clobbers */);
4946 return result;
4949 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
4950 vabdq_u8 (uint8x16_t a, uint8x16_t b)
4952 uint8x16_t result;
4953 __asm__ ("uabd %0.16b, %1.16b, %2.16b"
4954 : "=w"(result)
4955 : "w"(a), "w"(b)
4956 : /* No clobbers */);
4957 return result;
4960 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
4961 vabdq_u16 (uint16x8_t a, uint16x8_t b)
4963 uint16x8_t result;
4964 __asm__ ("uabd %0.8h, %1.8h, %2.8h"
4965 : "=w"(result)
4966 : "w"(a), "w"(b)
4967 : /* No clobbers */);
4968 return result;
4971 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
4972 vabdq_u32 (uint32x4_t a, uint32x4_t b)
4974 uint32x4_t result;
4975 __asm__ ("uabd %0.4s, %1.4s, %2.4s"
4976 : "=w"(result)
4977 : "w"(a), "w"(b)
4978 : /* No clobbers */);
4979 return result;
4982 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
4983 vabds_f32 (float32_t a, float32_t b)
4985 float32_t result;
4986 __asm__ ("fabd %s0, %s1, %s2"
4987 : "=w"(result)
4988 : "w"(a), "w"(b)
4989 : /* No clobbers */);
4990 return result;
4993 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
4994 vaddlv_s8 (int8x8_t a)
4996 int16_t result;
4997 __asm__ ("saddlv %h0,%1.8b"
4998 : "=w"(result)
4999 : "w"(a)
5000 : /* No clobbers */);
5001 return result;
5004 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5005 vaddlv_s16 (int16x4_t a)
5007 int32_t result;
5008 __asm__ ("saddlv %s0,%1.4h"
5009 : "=w"(result)
5010 : "w"(a)
5011 : /* No clobbers */);
5012 return result;
5015 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5016 vaddlv_u8 (uint8x8_t a)
5018 uint16_t result;
5019 __asm__ ("uaddlv %h0,%1.8b"
5020 : "=w"(result)
5021 : "w"(a)
5022 : /* No clobbers */);
5023 return result;
5026 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5027 vaddlv_u16 (uint16x4_t a)
5029 uint32_t result;
5030 __asm__ ("uaddlv %s0,%1.4h"
5031 : "=w"(result)
5032 : "w"(a)
5033 : /* No clobbers */);
5034 return result;
5037 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
5038 vaddlvq_s8 (int8x16_t a)
5040 int16_t result;
5041 __asm__ ("saddlv %h0,%1.16b"
5042 : "=w"(result)
5043 : "w"(a)
5044 : /* No clobbers */);
5045 return result;
5048 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
5049 vaddlvq_s16 (int16x8_t a)
5051 int32_t result;
5052 __asm__ ("saddlv %s0,%1.8h"
5053 : "=w"(result)
5054 : "w"(a)
5055 : /* No clobbers */);
5056 return result;
5059 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
5060 vaddlvq_s32 (int32x4_t a)
5062 int64_t result;
5063 __asm__ ("saddlv %d0,%1.4s"
5064 : "=w"(result)
5065 : "w"(a)
5066 : /* No clobbers */);
5067 return result;
5070 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
5071 vaddlvq_u8 (uint8x16_t a)
5073 uint16_t result;
5074 __asm__ ("uaddlv %h0,%1.16b"
5075 : "=w"(result)
5076 : "w"(a)
5077 : /* No clobbers */);
5078 return result;
5081 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
5082 vaddlvq_u16 (uint16x8_t a)
5084 uint32_t result;
5085 __asm__ ("uaddlv %s0,%1.8h"
5086 : "=w"(result)
5087 : "w"(a)
5088 : /* No clobbers */);
5089 return result;
5092 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
5093 vaddlvq_u32 (uint32x4_t a)
5095 uint64_t result;
5096 __asm__ ("uaddlv %d0,%1.4s"
5097 : "=w"(result)
5098 : "w"(a)
5099 : /* No clobbers */);
5100 return result;
5103 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5104 vcls_s8 (int8x8_t a)
5106 int8x8_t result;
5107 __asm__ ("cls %0.8b,%1.8b"
5108 : "=w"(result)
5109 : "w"(a)
5110 : /* No clobbers */);
5111 return result;
5114 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5115 vcls_s16 (int16x4_t a)
5117 int16x4_t result;
5118 __asm__ ("cls %0.4h,%1.4h"
5119 : "=w"(result)
5120 : "w"(a)
5121 : /* No clobbers */);
5122 return result;
5125 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5126 vcls_s32 (int32x2_t a)
5128 int32x2_t result;
5129 __asm__ ("cls %0.2s,%1.2s"
5130 : "=w"(result)
5131 : "w"(a)
5132 : /* No clobbers */);
5133 return result;
5136 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5137 vclsq_s8 (int8x16_t a)
5139 int8x16_t result;
5140 __asm__ ("cls %0.16b,%1.16b"
5141 : "=w"(result)
5142 : "w"(a)
5143 : /* No clobbers */);
5144 return result;
5147 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5148 vclsq_s16 (int16x8_t a)
5150 int16x8_t result;
5151 __asm__ ("cls %0.8h,%1.8h"
5152 : "=w"(result)
5153 : "w"(a)
5154 : /* No clobbers */);
5155 return result;
5158 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5159 vclsq_s32 (int32x4_t a)
5161 int32x4_t result;
5162 __asm__ ("cls %0.4s,%1.4s"
5163 : "=w"(result)
5164 : "w"(a)
5165 : /* No clobbers */);
5166 return result;
5169 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5170 vcnt_p8 (poly8x8_t a)
5172 poly8x8_t result;
5173 __asm__ ("cnt %0.8b,%1.8b"
5174 : "=w"(result)
5175 : "w"(a)
5176 : /* No clobbers */);
5177 return result;
5180 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5181 vcnt_s8 (int8x8_t a)
5183 int8x8_t result;
5184 __asm__ ("cnt %0.8b,%1.8b"
5185 : "=w"(result)
5186 : "w"(a)
5187 : /* No clobbers */);
5188 return result;
5191 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5192 vcnt_u8 (uint8x8_t a)
5194 uint8x8_t result;
5195 __asm__ ("cnt %0.8b,%1.8b"
5196 : "=w"(result)
5197 : "w"(a)
5198 : /* No clobbers */);
5199 return result;
5202 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
5203 vcntq_p8 (poly8x16_t a)
5205 poly8x16_t result;
5206 __asm__ ("cnt %0.16b,%1.16b"
5207 : "=w"(result)
5208 : "w"(a)
5209 : /* No clobbers */);
5210 return result;
5213 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5214 vcntq_s8 (int8x16_t a)
5216 int8x16_t result;
5217 __asm__ ("cnt %0.16b,%1.16b"
5218 : "=w"(result)
5219 : "w"(a)
5220 : /* No clobbers */);
5221 return result;
5224 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
5225 vcntq_u8 (uint8x16_t a)
5227 uint8x16_t result;
5228 __asm__ ("cnt %0.16b,%1.16b"
5229 : "=w"(result)
5230 : "w"(a)
5231 : /* No clobbers */);
5232 return result;
5235 #define vcopyq_lane_f32(a, b, c, d) \
5236 __extension__ \
5237 ({ \
5238 float32x4_t c_ = (c); \
5239 float32x4_t a_ = (a); \
5240 float32x4_t result; \
5241 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5242 : "=w"(result) \
5243 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5244 : /* No clobbers */); \
5245 result; \
5248 #define vcopyq_lane_f64(a, b, c, d) \
5249 __extension__ \
5250 ({ \
5251 float64x2_t c_ = (c); \
5252 float64x2_t a_ = (a); \
5253 float64x2_t result; \
5254 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5255 : "=w"(result) \
5256 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5257 : /* No clobbers */); \
5258 result; \
5261 #define vcopyq_lane_p8(a, b, c, d) \
5262 __extension__ \
5263 ({ \
5264 poly8x16_t c_ = (c); \
5265 poly8x16_t a_ = (a); \
5266 poly8x16_t result; \
5267 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5268 : "=w"(result) \
5269 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5270 : /* No clobbers */); \
5271 result; \
5274 #define vcopyq_lane_p16(a, b, c, d) \
5275 __extension__ \
5276 ({ \
5277 poly16x8_t c_ = (c); \
5278 poly16x8_t a_ = (a); \
5279 poly16x8_t result; \
5280 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5281 : "=w"(result) \
5282 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5283 : /* No clobbers */); \
5284 result; \
5287 #define vcopyq_lane_s8(a, b, c, d) \
5288 __extension__ \
5289 ({ \
5290 int8x16_t c_ = (c); \
5291 int8x16_t a_ = (a); \
5292 int8x16_t result; \
5293 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5294 : "=w"(result) \
5295 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5296 : /* No clobbers */); \
5297 result; \
5300 #define vcopyq_lane_s16(a, b, c, d) \
5301 __extension__ \
5302 ({ \
5303 int16x8_t c_ = (c); \
5304 int16x8_t a_ = (a); \
5305 int16x8_t result; \
5306 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5307 : "=w"(result) \
5308 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5309 : /* No clobbers */); \
5310 result; \
5313 #define vcopyq_lane_s32(a, b, c, d) \
5314 __extension__ \
5315 ({ \
5316 int32x4_t c_ = (c); \
5317 int32x4_t a_ = (a); \
5318 int32x4_t result; \
5319 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5320 : "=w"(result) \
5321 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5322 : /* No clobbers */); \
5323 result; \
5326 #define vcopyq_lane_s64(a, b, c, d) \
5327 __extension__ \
5328 ({ \
5329 int64x2_t c_ = (c); \
5330 int64x2_t a_ = (a); \
5331 int64x2_t result; \
5332 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5333 : "=w"(result) \
5334 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5335 : /* No clobbers */); \
5336 result; \
5339 #define vcopyq_lane_u8(a, b, c, d) \
5340 __extension__ \
5341 ({ \
5342 uint8x16_t c_ = (c); \
5343 uint8x16_t a_ = (a); \
5344 uint8x16_t result; \
5345 __asm__ ("ins %0.b[%2], %3.b[%4]" \
5346 : "=w"(result) \
5347 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5348 : /* No clobbers */); \
5349 result; \
5352 #define vcopyq_lane_u16(a, b, c, d) \
5353 __extension__ \
5354 ({ \
5355 uint16x8_t c_ = (c); \
5356 uint16x8_t a_ = (a); \
5357 uint16x8_t result; \
5358 __asm__ ("ins %0.h[%2], %3.h[%4]" \
5359 : "=w"(result) \
5360 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5361 : /* No clobbers */); \
5362 result; \
5365 #define vcopyq_lane_u32(a, b, c, d) \
5366 __extension__ \
5367 ({ \
5368 uint32x4_t c_ = (c); \
5369 uint32x4_t a_ = (a); \
5370 uint32x4_t result; \
5371 __asm__ ("ins %0.s[%2], %3.s[%4]" \
5372 : "=w"(result) \
5373 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5374 : /* No clobbers */); \
5375 result; \
5378 #define vcopyq_lane_u64(a, b, c, d) \
5379 __extension__ \
5380 ({ \
5381 uint64x2_t c_ = (c); \
5382 uint64x2_t a_ = (a); \
5383 uint64x2_t result; \
5384 __asm__ ("ins %0.d[%2], %3.d[%4]" \
5385 : "=w"(result) \
5386 : "0"(a_), "i"(b), "w"(c_), "i"(d) \
5387 : /* No clobbers */); \
5388 result; \
5391 /* vcvt_f16_f32 not supported */
5393 /* vcvt_f32_f16 not supported */
5395 /* vcvt_high_f16_f32 not supported */
5397 /* vcvt_high_f32_f16 not supported */
5399 static float32x2_t vdup_n_f32 (float32_t);
5401 #define vcvt_n_f32_s32(a, b) \
5402 __extension__ \
5403 ({ \
5404 int32x2_t a_ = (a); \
5405 float32x2_t result; \
5406 __asm__ ("scvtf %0.2s, %1.2s, #%2" \
5407 : "=w"(result) \
5408 : "w"(a_), "i"(b) \
5409 : /* No clobbers */); \
5410 result; \
5413 #define vcvt_n_f32_u32(a, b) \
5414 __extension__ \
5415 ({ \
5416 uint32x2_t a_ = (a); \
5417 float32x2_t result; \
5418 __asm__ ("ucvtf %0.2s, %1.2s, #%2" \
5419 : "=w"(result) \
5420 : "w"(a_), "i"(b) \
5421 : /* No clobbers */); \
5422 result; \
5425 #define vcvt_n_s32_f32(a, b) \
5426 __extension__ \
5427 ({ \
5428 float32x2_t a_ = (a); \
5429 int32x2_t result; \
5430 __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \
5431 : "=w"(result) \
5432 : "w"(a_), "i"(b) \
5433 : /* No clobbers */); \
5434 result; \
5437 #define vcvt_n_u32_f32(a, b) \
5438 __extension__ \
5439 ({ \
5440 float32x2_t a_ = (a); \
5441 uint32x2_t result; \
5442 __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \
5443 : "=w"(result) \
5444 : "w"(a_), "i"(b) \
5445 : /* No clobbers */); \
5446 result; \
5449 #define vcvtd_n_f64_s64(a, b) \
5450 __extension__ \
5451 ({ \
5452 int64_t a_ = (a); \
5453 float64_t result; \
5454 __asm__ ("scvtf %d0,%d1,%2" \
5455 : "=w"(result) \
5456 : "w"(a_), "i"(b) \
5457 : /* No clobbers */); \
5458 result; \
5461 #define vcvtd_n_f64_u64(a, b) \
5462 __extension__ \
5463 ({ \
5464 uint64_t a_ = (a); \
5465 float64_t result; \
5466 __asm__ ("ucvtf %d0,%d1,%2" \
5467 : "=w"(result) \
5468 : "w"(a_), "i"(b) \
5469 : /* No clobbers */); \
5470 result; \
5473 #define vcvtd_n_s64_f64(a, b) \
5474 __extension__ \
5475 ({ \
5476 float64_t a_ = (a); \
5477 int64_t result; \
5478 __asm__ ("fcvtzs %d0,%d1,%2" \
5479 : "=w"(result) \
5480 : "w"(a_), "i"(b) \
5481 : /* No clobbers */); \
5482 result; \
5485 #define vcvtd_n_u64_f64(a, b) \
5486 __extension__ \
5487 ({ \
5488 float64_t a_ = (a); \
5489 uint64_t result; \
5490 __asm__ ("fcvtzu %d0,%d1,%2" \
5491 : "=w"(result) \
5492 : "w"(a_), "i"(b) \
5493 : /* No clobbers */); \
5494 result; \
5497 #define vcvtq_n_f32_s32(a, b) \
5498 __extension__ \
5499 ({ \
5500 int32x4_t a_ = (a); \
5501 float32x4_t result; \
5502 __asm__ ("scvtf %0.4s, %1.4s, #%2" \
5503 : "=w"(result) \
5504 : "w"(a_), "i"(b) \
5505 : /* No clobbers */); \
5506 result; \
5509 #define vcvtq_n_f32_u32(a, b) \
5510 __extension__ \
5511 ({ \
5512 uint32x4_t a_ = (a); \
5513 float32x4_t result; \
5514 __asm__ ("ucvtf %0.4s, %1.4s, #%2" \
5515 : "=w"(result) \
5516 : "w"(a_), "i"(b) \
5517 : /* No clobbers */); \
5518 result; \
5521 #define vcvtq_n_f64_s64(a, b) \
5522 __extension__ \
5523 ({ \
5524 int64x2_t a_ = (a); \
5525 float64x2_t result; \
5526 __asm__ ("scvtf %0.2d, %1.2d, #%2" \
5527 : "=w"(result) \
5528 : "w"(a_), "i"(b) \
5529 : /* No clobbers */); \
5530 result; \
5533 #define vcvtq_n_f64_u64(a, b) \
5534 __extension__ \
5535 ({ \
5536 uint64x2_t a_ = (a); \
5537 float64x2_t result; \
5538 __asm__ ("ucvtf %0.2d, %1.2d, #%2" \
5539 : "=w"(result) \
5540 : "w"(a_), "i"(b) \
5541 : /* No clobbers */); \
5542 result; \
5545 #define vcvtq_n_s32_f32(a, b) \
5546 __extension__ \
5547 ({ \
5548 float32x4_t a_ = (a); \
5549 int32x4_t result; \
5550 __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \
5551 : "=w"(result) \
5552 : "w"(a_), "i"(b) \
5553 : /* No clobbers */); \
5554 result; \
5557 #define vcvtq_n_s64_f64(a, b) \
5558 __extension__ \
5559 ({ \
5560 float64x2_t a_ = (a); \
5561 int64x2_t result; \
5562 __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \
5563 : "=w"(result) \
5564 : "w"(a_), "i"(b) \
5565 : /* No clobbers */); \
5566 result; \
5569 #define vcvtq_n_u32_f32(a, b) \
5570 __extension__ \
5571 ({ \
5572 float32x4_t a_ = (a); \
5573 uint32x4_t result; \
5574 __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \
5575 : "=w"(result) \
5576 : "w"(a_), "i"(b) \
5577 : /* No clobbers */); \
5578 result; \
5581 #define vcvtq_n_u64_f64(a, b) \
5582 __extension__ \
5583 ({ \
5584 float64x2_t a_ = (a); \
5585 uint64x2_t result; \
5586 __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \
5587 : "=w"(result) \
5588 : "w"(a_), "i"(b) \
5589 : /* No clobbers */); \
5590 result; \
5593 #define vcvts_n_f32_s32(a, b) \
5594 __extension__ \
5595 ({ \
5596 int32_t a_ = (a); \
5597 float32_t result; \
5598 __asm__ ("scvtf %s0,%s1,%2" \
5599 : "=w"(result) \
5600 : "w"(a_), "i"(b) \
5601 : /* No clobbers */); \
5602 result; \
5605 #define vcvts_n_f32_u32(a, b) \
5606 __extension__ \
5607 ({ \
5608 uint32_t a_ = (a); \
5609 float32_t result; \
5610 __asm__ ("ucvtf %s0,%s1,%2" \
5611 : "=w"(result) \
5612 : "w"(a_), "i"(b) \
5613 : /* No clobbers */); \
5614 result; \
5617 #define vcvts_n_s32_f32(a, b) \
5618 __extension__ \
5619 ({ \
5620 float32_t a_ = (a); \
5621 int32_t result; \
5622 __asm__ ("fcvtzs %s0,%s1,%2" \
5623 : "=w"(result) \
5624 : "w"(a_), "i"(b) \
5625 : /* No clobbers */); \
5626 result; \
5629 #define vcvts_n_u32_f32(a, b) \
5630 __extension__ \
5631 ({ \
5632 float32_t a_ = (a); \
5633 uint32_t result; \
5634 __asm__ ("fcvtzu %s0,%s1,%2" \
5635 : "=w"(result) \
5636 : "w"(a_), "i"(b) \
5637 : /* No clobbers */); \
5638 result; \
5641 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5642 vcvtx_f32_f64 (float64x2_t a)
5644 float32x2_t result;
5645 __asm__ ("fcvtxn %0.2s,%1.2d"
5646 : "=w"(result)
5647 : "w"(a)
5648 : /* No clobbers */);
5649 return result;
5652 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5653 vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b)
5655 float32x4_t result;
5656 __asm__ ("fcvtxn2 %0.4s,%1.2d"
5657 : "=w"(result)
5658 : "w" (b), "0"(a)
5659 : /* No clobbers */);
5660 return result;
5663 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
5664 vcvtxd_f32_f64 (float64_t a)
5666 float32_t result;
5667 __asm__ ("fcvtxn %s0,%d1"
5668 : "=w"(result)
5669 : "w"(a)
5670 : /* No clobbers */);
5671 return result;
5674 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5675 vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5677 float32x2_t result;
5678 __asm__ ("fmla %0.2s,%2.2s,%3.2s"
5679 : "=w"(result)
5680 : "0"(a), "w"(b), "w"(c)
5681 : /* No clobbers */);
5682 return result;
5685 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5686 vfmaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5688 float32x4_t result;
5689 __asm__ ("fmla %0.4s,%2.4s,%3.4s"
5690 : "=w"(result)
5691 : "0"(a), "w"(b), "w"(c)
5692 : /* No clobbers */);
5693 return result;
5696 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5697 vfmaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5699 float64x2_t result;
5700 __asm__ ("fmla %0.2d,%2.2d,%3.2d"
5701 : "=w"(result)
5702 : "0"(a), "w"(b), "w"(c)
5703 : /* No clobbers */);
5704 return result;
5707 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5708 vfma_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
5710 float32x2_t result;
5711 __asm__ ("fmla %0.2s, %2.2s, %3.s[0]"
5712 : "=w"(result)
5713 : "0"(a), "w"(b), "w"(c)
5714 : /* No clobbers */);
5715 return result;
5718 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5719 vfmaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
5721 float32x4_t result;
5722 __asm__ ("fmla %0.4s, %2.4s, %3.s[0]"
5723 : "=w"(result)
5724 : "0"(a), "w"(b), "w"(c)
5725 : /* No clobbers */);
5726 return result;
5729 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5730 vfmaq_n_f64 (float64x2_t a, float64x2_t b, float64_t c)
5732 float64x2_t result;
5733 __asm__ ("fmla %0.2d, %2.2d, %3.d[0]"
5734 : "=w"(result)
5735 : "0"(a), "w"(b), "w"(c)
5736 : /* No clobbers */);
5737 return result;
5740 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5741 vfms_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
5743 float32x2_t result;
5744 __asm__ ("fmls %0.2s,%2.2s,%3.2s"
5745 : "=w"(result)
5746 : "0"(a), "w"(b), "w"(c)
5747 : /* No clobbers */);
5748 return result;
5751 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
5752 vfmsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
5754 float32x4_t result;
5755 __asm__ ("fmls %0.4s,%2.4s,%3.4s"
5756 : "=w"(result)
5757 : "0"(a), "w"(b), "w"(c)
5758 : /* No clobbers */);
5759 return result;
5762 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
5763 vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
5765 float64x2_t result;
5766 __asm__ ("fmls %0.2d,%2.2d,%3.2d"
5767 : "=w"(result)
5768 : "0"(a), "w"(b), "w"(c)
5769 : /* No clobbers */);
5770 return result;
5773 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
5774 vget_high_f32 (float32x4_t a)
5776 float32x2_t result;
5777 __asm__ ("ins %0.d[0], %1.d[1]"
5778 : "=w"(result)
5779 : "w"(a)
5780 : /* No clobbers */);
5781 return result;
5784 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
5785 vget_high_f64 (float64x2_t a)
5787 float64x1_t result;
5788 __asm__ ("ins %0.d[0], %1.d[1]"
5789 : "=w"(result)
5790 : "w"(a)
5791 : /* No clobbers */);
5792 return result;
5795 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
5796 vget_high_p8 (poly8x16_t a)
5798 poly8x8_t result;
5799 __asm__ ("ins %0.d[0], %1.d[1]"
5800 : "=w"(result)
5801 : "w"(a)
5802 : /* No clobbers */);
5803 return result;
5806 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
5807 vget_high_p16 (poly16x8_t a)
5809 poly16x4_t result;
5810 __asm__ ("ins %0.d[0], %1.d[1]"
5811 : "=w"(result)
5812 : "w"(a)
5813 : /* No clobbers */);
5814 return result;
5817 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5818 vget_high_s8 (int8x16_t a)
5820 int8x8_t result;
5821 __asm__ ("ins %0.d[0], %1.d[1]"
5822 : "=w"(result)
5823 : "w"(a)
5824 : /* No clobbers */);
5825 return result;
5828 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5829 vget_high_s16 (int16x8_t a)
5831 int16x4_t result;
5832 __asm__ ("ins %0.d[0], %1.d[1]"
5833 : "=w"(result)
5834 : "w"(a)
5835 : /* No clobbers */);
5836 return result;
5839 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5840 vget_high_s32 (int32x4_t a)
5842 int32x2_t result;
5843 __asm__ ("ins %0.d[0], %1.d[1]"
5844 : "=w"(result)
5845 : "w"(a)
5846 : /* No clobbers */);
5847 return result;
5850 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
5851 vget_high_s64 (int64x2_t a)
5853 int64x1_t result;
5854 __asm__ ("ins %0.d[0], %1.d[1]"
5855 : "=w"(result)
5856 : "w"(a)
5857 : /* No clobbers */);
5858 return result;
5861 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5862 vget_high_u8 (uint8x16_t a)
5864 uint8x8_t result;
5865 __asm__ ("ins %0.d[0], %1.d[1]"
5866 : "=w"(result)
5867 : "w"(a)
5868 : /* No clobbers */);
5869 return result;
5872 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5873 vget_high_u16 (uint16x8_t a)
5875 uint16x4_t result;
5876 __asm__ ("ins %0.d[0], %1.d[1]"
5877 : "=w"(result)
5878 : "w"(a)
5879 : /* No clobbers */);
5880 return result;
5883 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5884 vget_high_u32 (uint32x4_t a)
5886 uint32x2_t result;
5887 __asm__ ("ins %0.d[0], %1.d[1]"
5888 : "=w"(result)
5889 : "w"(a)
5890 : /* No clobbers */);
5891 return result;
5894 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
5895 vget_high_u64 (uint64x2_t a)
5897 uint64x1_t result;
5898 __asm__ ("ins %0.d[0], %1.d[1]"
5899 : "=w"(result)
5900 : "w"(a)
5901 : /* No clobbers */);
5902 return result;
5905 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
5906 vhsub_s8 (int8x8_t a, int8x8_t b)
5908 int8x8_t result;
5909 __asm__ ("shsub %0.8b, %1.8b, %2.8b"
5910 : "=w"(result)
5911 : "w"(a), "w"(b)
5912 : /* No clobbers */);
5913 return result;
5916 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
5917 vhsub_s16 (int16x4_t a, int16x4_t b)
5919 int16x4_t result;
5920 __asm__ ("shsub %0.4h, %1.4h, %2.4h"
5921 : "=w"(result)
5922 : "w"(a), "w"(b)
5923 : /* No clobbers */);
5924 return result;
5927 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
5928 vhsub_s32 (int32x2_t a, int32x2_t b)
5930 int32x2_t result;
5931 __asm__ ("shsub %0.2s, %1.2s, %2.2s"
5932 : "=w"(result)
5933 : "w"(a), "w"(b)
5934 : /* No clobbers */);
5935 return result;
5938 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
5939 vhsub_u8 (uint8x8_t a, uint8x8_t b)
5941 uint8x8_t result;
5942 __asm__ ("uhsub %0.8b, %1.8b, %2.8b"
5943 : "=w"(result)
5944 : "w"(a), "w"(b)
5945 : /* No clobbers */);
5946 return result;
5949 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
5950 vhsub_u16 (uint16x4_t a, uint16x4_t b)
5952 uint16x4_t result;
5953 __asm__ ("uhsub %0.4h, %1.4h, %2.4h"
5954 : "=w"(result)
5955 : "w"(a), "w"(b)
5956 : /* No clobbers */);
5957 return result;
5960 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
5961 vhsub_u32 (uint32x2_t a, uint32x2_t b)
5963 uint32x2_t result;
5964 __asm__ ("uhsub %0.2s, %1.2s, %2.2s"
5965 : "=w"(result)
5966 : "w"(a), "w"(b)
5967 : /* No clobbers */);
5968 return result;
5971 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
5972 vhsubq_s8 (int8x16_t a, int8x16_t b)
5974 int8x16_t result;
5975 __asm__ ("shsub %0.16b, %1.16b, %2.16b"
5976 : "=w"(result)
5977 : "w"(a), "w"(b)
5978 : /* No clobbers */);
5979 return result;
5982 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
5983 vhsubq_s16 (int16x8_t a, int16x8_t b)
5985 int16x8_t result;
5986 __asm__ ("shsub %0.8h, %1.8h, %2.8h"
5987 : "=w"(result)
5988 : "w"(a), "w"(b)
5989 : /* No clobbers */);
5990 return result;
5993 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
5994 vhsubq_s32 (int32x4_t a, int32x4_t b)
5996 int32x4_t result;
5997 __asm__ ("shsub %0.4s, %1.4s, %2.4s"
5998 : "=w"(result)
5999 : "w"(a), "w"(b)
6000 : /* No clobbers */);
6001 return result;
6004 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6005 vhsubq_u8 (uint8x16_t a, uint8x16_t b)
6007 uint8x16_t result;
6008 __asm__ ("uhsub %0.16b, %1.16b, %2.16b"
6009 : "=w"(result)
6010 : "w"(a), "w"(b)
6011 : /* No clobbers */);
6012 return result;
6015 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6016 vhsubq_u16 (uint16x8_t a, uint16x8_t b)
6018 uint16x8_t result;
6019 __asm__ ("uhsub %0.8h, %1.8h, %2.8h"
6020 : "=w"(result)
6021 : "w"(a), "w"(b)
6022 : /* No clobbers */);
6023 return result;
6026 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6027 vhsubq_u32 (uint32x4_t a, uint32x4_t b)
6029 uint32x4_t result;
6030 __asm__ ("uhsub %0.4s, %1.4s, %2.4s"
6031 : "=w"(result)
6032 : "w"(a), "w"(b)
6033 : /* No clobbers */);
6034 return result;
6037 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6038 vld1_dup_f32 (const float32_t * a)
6040 float32x2_t result;
6041 __asm__ ("ld1r {%0.2s}, %1"
6042 : "=w"(result)
6043 : "Utv"(*a)
6044 : /* No clobbers */);
6045 return result;
6048 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
6049 vld1_dup_f64 (const float64_t * a)
6051 float64x1_t result;
6052 __asm__ ("ld1r {%0.1d}, %1"
6053 : "=w"(result)
6054 : "Utv"(*a)
6055 : /* No clobbers */);
6056 return result;
6059 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
6060 vld1_dup_p8 (const poly8_t * a)
6062 poly8x8_t result;
6063 __asm__ ("ld1r {%0.8b}, %1"
6064 : "=w"(result)
6065 : "Utv"(*a)
6066 : /* No clobbers */);
6067 return result;
6070 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
6071 vld1_dup_p16 (const poly16_t * a)
6073 poly16x4_t result;
6074 __asm__ ("ld1r {%0.4h}, %1"
6075 : "=w"(result)
6076 : "Utv"(*a)
6077 : /* No clobbers */);
6078 return result;
6081 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6082 vld1_dup_s8 (const int8_t * a)
6084 int8x8_t result;
6085 __asm__ ("ld1r {%0.8b}, %1"
6086 : "=w"(result)
6087 : "Utv"(*a)
6088 : /* No clobbers */);
6089 return result;
6092 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6093 vld1_dup_s16 (const int16_t * a)
6095 int16x4_t result;
6096 __asm__ ("ld1r {%0.4h}, %1"
6097 : "=w"(result)
6098 : "Utv"(*a)
6099 : /* No clobbers */);
6100 return result;
6103 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6104 vld1_dup_s32 (const int32_t * a)
6106 int32x2_t result;
6107 __asm__ ("ld1r {%0.2s}, %1"
6108 : "=w"(result)
6109 : "Utv"(*a)
6110 : /* No clobbers */);
6111 return result;
6114 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
6115 vld1_dup_s64 (const int64_t * a)
6117 int64x1_t result;
6118 __asm__ ("ld1r {%0.1d}, %1"
6119 : "=w"(result)
6120 : "Utv"(*a)
6121 : /* No clobbers */);
6122 return result;
6125 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6126 vld1_dup_u8 (const uint8_t * a)
6128 uint8x8_t result;
6129 __asm__ ("ld1r {%0.8b}, %1"
6130 : "=w"(result)
6131 : "Utv"(*a)
6132 : /* No clobbers */);
6133 return result;
6136 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6137 vld1_dup_u16 (const uint16_t * a)
6139 uint16x4_t result;
6140 __asm__ ("ld1r {%0.4h}, %1"
6141 : "=w"(result)
6142 : "Utv"(*a)
6143 : /* No clobbers */);
6144 return result;
6147 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6148 vld1_dup_u32 (const uint32_t * a)
6150 uint32x2_t result;
6151 __asm__ ("ld1r {%0.2s}, %1"
6152 : "=w"(result)
6153 : "Utv"(*a)
6154 : /* No clobbers */);
6155 return result;
6158 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
6159 vld1_dup_u64 (const uint64_t * a)
6161 uint64x1_t result;
6162 __asm__ ("ld1r {%0.1d}, %1"
6163 : "=w"(result)
6164 : "Utv"(*a)
6165 : /* No clobbers */);
6166 return result;
6169 #define vld1_lane_f32(a, b, c) \
6170 __extension__ \
6171 ({ \
6172 float32x2_t b_ = (b); \
6173 const float32_t * a_ = (a); \
6174 float32x2_t result; \
6175 __asm__ ("ld1 {%0.s}[%1], %2" \
6176 : "=w"(result) \
6177 : "i" (c), "Utv"(*a_), "0"(b_) \
6178 : /* No clobbers */); \
6179 result; \
6182 #define vld1_lane_f64(a, b, c) \
6183 __extension__ \
6184 ({ \
6185 float64x1_t b_ = (b); \
6186 const float64_t * a_ = (a); \
6187 float64x1_t result; \
6188 __asm__ ("ld1 {%0.d}[%1], %2" \
6189 : "=w"(result) \
6190 : "i" (c), "Utv"(*a_), "0"(b_) \
6191 : /* No clobbers */); \
6192 result; \
6195 #define vld1_lane_p8(a, b, c) \
6196 __extension__ \
6197 ({ \
6198 poly8x8_t b_ = (b); \
6199 const poly8_t * a_ = (a); \
6200 poly8x8_t result; \
6201 __asm__ ("ld1 {%0.b}[%1], %2" \
6202 : "=w"(result) \
6203 : "i" (c), "Utv"(*a_), "0"(b_) \
6204 : /* No clobbers */); \
6205 result; \
6208 #define vld1_lane_p16(a, b, c) \
6209 __extension__ \
6210 ({ \
6211 poly16x4_t b_ = (b); \
6212 const poly16_t * a_ = (a); \
6213 poly16x4_t result; \
6214 __asm__ ("ld1 {%0.h}[%1], %2" \
6215 : "=w"(result) \
6216 : "i" (c), "Utv"(*a_), "0"(b_) \
6217 : /* No clobbers */); \
6218 result; \
6221 #define vld1_lane_s8(a, b, c) \
6222 __extension__ \
6223 ({ \
6224 int8x8_t b_ = (b); \
6225 const int8_t * a_ = (a); \
6226 int8x8_t result; \
6227 __asm__ ("ld1 {%0.b}[%1], %2" \
6228 : "=w"(result) \
6229 : "i" (c), "Utv"(*a_), "0"(b_) \
6230 : /* No clobbers */); \
6231 result; \
6234 #define vld1_lane_s16(a, b, c) \
6235 __extension__ \
6236 ({ \
6237 int16x4_t b_ = (b); \
6238 const int16_t * a_ = (a); \
6239 int16x4_t result; \
6240 __asm__ ("ld1 {%0.h}[%1], %2" \
6241 : "=w"(result) \
6242 : "i" (c), "Utv"(*a_), "0"(b_) \
6243 : /* No clobbers */); \
6244 result; \
6247 #define vld1_lane_s32(a, b, c) \
6248 __extension__ \
6249 ({ \
6250 int32x2_t b_ = (b); \
6251 const int32_t * a_ = (a); \
6252 int32x2_t result; \
6253 __asm__ ("ld1 {%0.s}[%1], %2" \
6254 : "=w"(result) \
6255 : "i" (c), "Utv"(*a_), "0"(b_) \
6256 : /* No clobbers */); \
6257 result; \
6260 #define vld1_lane_s64(a, b, c) \
6261 __extension__ \
6262 ({ \
6263 int64x1_t b_ = (b); \
6264 const int64_t * a_ = (a); \
6265 int64x1_t result; \
6266 __asm__ ("ld1 {%0.d}[%1], %2" \
6267 : "=w"(result) \
6268 : "i" (c), "Utv"(*a_), "0"(b_) \
6269 : /* No clobbers */); \
6270 result; \
6273 #define vld1_lane_u8(a, b, c) \
6274 __extension__ \
6275 ({ \
6276 uint8x8_t b_ = (b); \
6277 const uint8_t * a_ = (a); \
6278 uint8x8_t result; \
6279 __asm__ ("ld1 {%0.b}[%1], %2" \
6280 : "=w"(result) \
6281 : "i" (c), "Utv"(*a_), "0"(b_) \
6282 : /* No clobbers */); \
6283 result; \
6286 #define vld1_lane_u16(a, b, c) \
6287 __extension__ \
6288 ({ \
6289 uint16x4_t b_ = (b); \
6290 const uint16_t * a_ = (a); \
6291 uint16x4_t result; \
6292 __asm__ ("ld1 {%0.h}[%1], %2" \
6293 : "=w"(result) \
6294 : "i" (c), "Utv"(*a_), "0"(b_) \
6295 : /* No clobbers */); \
6296 result; \
6299 #define vld1_lane_u32(a, b, c) \
6300 __extension__ \
6301 ({ \
6302 uint32x2_t b_ = (b); \
6303 const uint32_t * a_ = (a); \
6304 uint32x2_t result; \
6305 __asm__ ("ld1 {%0.s}[%1], %2" \
6306 : "=w"(result) \
6307 : "i" (c), "Utv"(*a_), "0"(b_) \
6308 : /* No clobbers */); \
6309 result; \
6312 #define vld1_lane_u64(a, b, c) \
6313 __extension__ \
6314 ({ \
6315 uint64x1_t b_ = (b); \
6316 const uint64_t * a_ = (a); \
6317 uint64x1_t result; \
6318 __asm__ ("ld1 {%0.d}[%1], %2" \
6319 : "=w"(result) \
6320 : "i" (c), "Utv"(*a_), "0"(b_) \
6321 : /* No clobbers */); \
6322 result; \
6325 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
6326 vld1q_dup_f32 (const float32_t * a)
6328 float32x4_t result;
6329 __asm__ ("ld1r {%0.4s}, %1"
6330 : "=w"(result)
6331 : "Utv"(*a)
6332 : /* No clobbers */);
6333 return result;
6336 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
6337 vld1q_dup_f64 (const float64_t * a)
6339 float64x2_t result;
6340 __asm__ ("ld1r {%0.2d}, %1"
6341 : "=w"(result)
6342 : "Utv"(*a)
6343 : /* No clobbers */);
6344 return result;
6347 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
6348 vld1q_dup_p8 (const poly8_t * a)
6350 poly8x16_t result;
6351 __asm__ ("ld1r {%0.16b}, %1"
6352 : "=w"(result)
6353 : "Utv"(*a)
6354 : /* No clobbers */);
6355 return result;
6358 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
6359 vld1q_dup_p16 (const poly16_t * a)
6361 poly16x8_t result;
6362 __asm__ ("ld1r {%0.8h}, %1"
6363 : "=w"(result)
6364 : "Utv"(*a)
6365 : /* No clobbers */);
6366 return result;
6369 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
6370 vld1q_dup_s8 (const int8_t * a)
6372 int8x16_t result;
6373 __asm__ ("ld1r {%0.16b}, %1"
6374 : "=w"(result)
6375 : "Utv"(*a)
6376 : /* No clobbers */);
6377 return result;
6380 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6381 vld1q_dup_s16 (const int16_t * a)
6383 int16x8_t result;
6384 __asm__ ("ld1r {%0.8h}, %1"
6385 : "=w"(result)
6386 : "Utv"(*a)
6387 : /* No clobbers */);
6388 return result;
6391 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6392 vld1q_dup_s32 (const int32_t * a)
6394 int32x4_t result;
6395 __asm__ ("ld1r {%0.4s}, %1"
6396 : "=w"(result)
6397 : "Utv"(*a)
6398 : /* No clobbers */);
6399 return result;
6402 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6403 vld1q_dup_s64 (const int64_t * a)
6405 int64x2_t result;
6406 __asm__ ("ld1r {%0.2d}, %1"
6407 : "=w"(result)
6408 : "Utv"(*a)
6409 : /* No clobbers */);
6410 return result;
6413 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
6414 vld1q_dup_u8 (const uint8_t * a)
6416 uint8x16_t result;
6417 __asm__ ("ld1r {%0.16b}, %1"
6418 : "=w"(result)
6419 : "Utv"(*a)
6420 : /* No clobbers */);
6421 return result;
6424 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6425 vld1q_dup_u16 (const uint16_t * a)
6427 uint16x8_t result;
6428 __asm__ ("ld1r {%0.8h}, %1"
6429 : "=w"(result)
6430 : "Utv"(*a)
6431 : /* No clobbers */);
6432 return result;
6435 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6436 vld1q_dup_u32 (const uint32_t * a)
6438 uint32x4_t result;
6439 __asm__ ("ld1r {%0.4s}, %1"
6440 : "=w"(result)
6441 : "Utv"(*a)
6442 : /* No clobbers */);
6443 return result;
6446 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6447 vld1q_dup_u64 (const uint64_t * a)
6449 uint64x2_t result;
6450 __asm__ ("ld1r {%0.2d}, %1"
6451 : "=w"(result)
6452 : "Utv"(*a)
6453 : /* No clobbers */);
6454 return result;
6457 #define vld1q_lane_f32(a, b, c) \
6458 __extension__ \
6459 ({ \
6460 float32x4_t b_ = (b); \
6461 const float32_t * a_ = (a); \
6462 float32x4_t result; \
6463 __asm__ ("ld1 {%0.s}[%1], %2" \
6464 : "=w"(result) \
6465 : "i"(c), "Utv"(*a_), "0"(b_) \
6466 : /* No clobbers */); \
6467 result; \
6470 #define vld1q_lane_f64(a, b, c) \
6471 __extension__ \
6472 ({ \
6473 float64x2_t b_ = (b); \
6474 const float64_t * a_ = (a); \
6475 float64x2_t result; \
6476 __asm__ ("ld1 {%0.d}[%1], %2" \
6477 : "=w"(result) \
6478 : "i"(c), "Utv"(*a_), "0"(b_) \
6479 : /* No clobbers */); \
6480 result; \
6483 #define vld1q_lane_p8(a, b, c) \
6484 __extension__ \
6485 ({ \
6486 poly8x16_t b_ = (b); \
6487 const poly8_t * a_ = (a); \
6488 poly8x16_t result; \
6489 __asm__ ("ld1 {%0.b}[%1], %2" \
6490 : "=w"(result) \
6491 : "i"(c), "Utv"(*a_), "0"(b_) \
6492 : /* No clobbers */); \
6493 result; \
6496 #define vld1q_lane_p16(a, b, c) \
6497 __extension__ \
6498 ({ \
6499 poly16x8_t b_ = (b); \
6500 const poly16_t * a_ = (a); \
6501 poly16x8_t result; \
6502 __asm__ ("ld1 {%0.h}[%1], %2" \
6503 : "=w"(result) \
6504 : "i"(c), "Utv"(*a_), "0"(b_) \
6505 : /* No clobbers */); \
6506 result; \
6509 #define vld1q_lane_s8(a, b, c) \
6510 __extension__ \
6511 ({ \
6512 int8x16_t b_ = (b); \
6513 const int8_t * a_ = (a); \
6514 int8x16_t result; \
6515 __asm__ ("ld1 {%0.b}[%1], %2" \
6516 : "=w"(result) \
6517 : "i"(c), "Utv"(*a_), "0"(b_) \
6518 : /* No clobbers */); \
6519 result; \
6522 #define vld1q_lane_s16(a, b, c) \
6523 __extension__ \
6524 ({ \
6525 int16x8_t b_ = (b); \
6526 const int16_t * a_ = (a); \
6527 int16x8_t result; \
6528 __asm__ ("ld1 {%0.h}[%1], %2" \
6529 : "=w"(result) \
6530 : "i"(c), "Utv"(*a_), "0"(b_) \
6531 : /* No clobbers */); \
6532 result; \
6535 #define vld1q_lane_s32(a, b, c) \
6536 __extension__ \
6537 ({ \
6538 int32x4_t b_ = (b); \
6539 const int32_t * a_ = (a); \
6540 int32x4_t result; \
6541 __asm__ ("ld1 {%0.s}[%1], %2" \
6542 : "=w"(result) \
6543 : "i"(c), "Utv"(*a_), "0"(b_) \
6544 : /* No clobbers */); \
6545 result; \
6548 #define vld1q_lane_s64(a, b, c) \
6549 __extension__ \
6550 ({ \
6551 int64x2_t b_ = (b); \
6552 const int64_t * a_ = (a); \
6553 int64x2_t result; \
6554 __asm__ ("ld1 {%0.d}[%1], %2" \
6555 : "=w"(result) \
6556 : "i"(c), "Utv"(*a_), "0"(b_) \
6557 : /* No clobbers */); \
6558 result; \
6561 #define vld1q_lane_u8(a, b, c) \
6562 __extension__ \
6563 ({ \
6564 uint8x16_t b_ = (b); \
6565 const uint8_t * a_ = (a); \
6566 uint8x16_t result; \
6567 __asm__ ("ld1 {%0.b}[%1], %2" \
6568 : "=w"(result) \
6569 : "i"(c), "Utv"(*a_), "0"(b_) \
6570 : /* No clobbers */); \
6571 result; \
6574 #define vld1q_lane_u16(a, b, c) \
6575 __extension__ \
6576 ({ \
6577 uint16x8_t b_ = (b); \
6578 const uint16_t * a_ = (a); \
6579 uint16x8_t result; \
6580 __asm__ ("ld1 {%0.h}[%1], %2" \
6581 : "=w"(result) \
6582 : "i"(c), "Utv"(*a_), "0"(b_) \
6583 : /* No clobbers */); \
6584 result; \
6587 #define vld1q_lane_u32(a, b, c) \
6588 __extension__ \
6589 ({ \
6590 uint32x4_t b_ = (b); \
6591 const uint32_t * a_ = (a); \
6592 uint32x4_t result; \
6593 __asm__ ("ld1 {%0.s}[%1], %2" \
6594 : "=w"(result) \
6595 : "i"(c), "Utv"(*a_), "0"(b_) \
6596 : /* No clobbers */); \
6597 result; \
6600 #define vld1q_lane_u64(a, b, c) \
6601 __extension__ \
6602 ({ \
6603 uint64x2_t b_ = (b); \
6604 const uint64_t * a_ = (a); \
6605 uint64x2_t result; \
6606 __asm__ ("ld1 {%0.d}[%1], %2" \
6607 : "=w"(result) \
6608 : "i"(c), "Utv"(*a_), "0"(b_) \
6609 : /* No clobbers */); \
6610 result; \
6613 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
6614 vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
6616 float32x2_t result;
6617 float32x2_t t1;
6618 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s"
6619 : "=w"(result), "=w"(t1)
6620 : "0"(a), "w"(b), "w"(c)
6621 : /* No clobbers */);
6622 return result;
6625 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6626 vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
6628 int16x4_t result;
6629 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6630 : "=w"(result)
6631 : "0"(a), "w"(b), "x"(c)
6632 : /* No clobbers */);
6633 return result;
6636 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6637 vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
6639 int32x2_t result;
6640 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6641 : "=w"(result)
6642 : "0"(a), "w"(b), "w"(c)
6643 : /* No clobbers */);
6644 return result;
6647 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6648 vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
6650 uint16x4_t result;
6651 __asm__ ("mla %0.4h,%2.4h,%3.h[0]"
6652 : "=w"(result)
6653 : "0"(a), "w"(b), "x"(c)
6654 : /* No clobbers */);
6655 return result;
6658 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6659 vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
6661 uint32x2_t result;
6662 __asm__ ("mla %0.2s,%2.2s,%3.s[0]"
6663 : "=w"(result)
6664 : "0"(a), "w"(b), "w"(c)
6665 : /* No clobbers */);
6666 return result;
6669 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
6670 vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
6672 int8x8_t result;
6673 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6674 : "=w"(result)
6675 : "0"(a), "w"(b), "w"(c)
6676 : /* No clobbers */);
6677 return result;
6680 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
6681 vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
6683 int16x4_t result;
6684 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6685 : "=w"(result)
6686 : "0"(a), "w"(b), "w"(c)
6687 : /* No clobbers */);
6688 return result;
6691 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
6692 vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
6694 int32x2_t result;
6695 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6696 : "=w"(result)
6697 : "0"(a), "w"(b), "w"(c)
6698 : /* No clobbers */);
6699 return result;
6702 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
6703 vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
6705 uint8x8_t result;
6706 __asm__ ("mla %0.8b, %2.8b, %3.8b"
6707 : "=w"(result)
6708 : "0"(a), "w"(b), "w"(c)
6709 : /* No clobbers */);
6710 return result;
6713 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
6714 vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
6716 uint16x4_t result;
6717 __asm__ ("mla %0.4h, %2.4h, %3.4h"
6718 : "=w"(result)
6719 : "0"(a), "w"(b), "w"(c)
6720 : /* No clobbers */);
6721 return result;
6724 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
6725 vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
6727 uint32x2_t result;
6728 __asm__ ("mla %0.2s, %2.2s, %3.2s"
6729 : "=w"(result)
6730 : "0"(a), "w"(b), "w"(c)
6731 : /* No clobbers */);
6732 return result;
6735 #define vmlal_high_lane_s16(a, b, c, d) \
6736 __extension__ \
6737 ({ \
6738 int16x4_t c_ = (c); \
6739 int16x8_t b_ = (b); \
6740 int32x4_t a_ = (a); \
6741 int32x4_t result; \
6742 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6743 : "=w"(result) \
6744 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6745 : /* No clobbers */); \
6746 result; \
6749 #define vmlal_high_lane_s32(a, b, c, d) \
6750 __extension__ \
6751 ({ \
6752 int32x2_t c_ = (c); \
6753 int32x4_t b_ = (b); \
6754 int64x2_t a_ = (a); \
6755 int64x2_t result; \
6756 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6757 : "=w"(result) \
6758 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6759 : /* No clobbers */); \
6760 result; \
6763 #define vmlal_high_lane_u16(a, b, c, d) \
6764 __extension__ \
6765 ({ \
6766 uint16x4_t c_ = (c); \
6767 uint16x8_t b_ = (b); \
6768 uint32x4_t a_ = (a); \
6769 uint32x4_t result; \
6770 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6771 : "=w"(result) \
6772 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6773 : /* No clobbers */); \
6774 result; \
6777 #define vmlal_high_lane_u32(a, b, c, d) \
6778 __extension__ \
6779 ({ \
6780 uint32x2_t c_ = (c); \
6781 uint32x4_t b_ = (b); \
6782 uint64x2_t a_ = (a); \
6783 uint64x2_t result; \
6784 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6785 : "=w"(result) \
6786 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6787 : /* No clobbers */); \
6788 result; \
6791 #define vmlal_high_laneq_s16(a, b, c, d) \
6792 __extension__ \
6793 ({ \
6794 int16x8_t c_ = (c); \
6795 int16x8_t b_ = (b); \
6796 int32x4_t a_ = (a); \
6797 int32x4_t result; \
6798 __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \
6799 : "=w"(result) \
6800 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6801 : /* No clobbers */); \
6802 result; \
6805 #define vmlal_high_laneq_s32(a, b, c, d) \
6806 __extension__ \
6807 ({ \
6808 int32x4_t c_ = (c); \
6809 int32x4_t b_ = (b); \
6810 int64x2_t a_ = (a); \
6811 int64x2_t result; \
6812 __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \
6813 : "=w"(result) \
6814 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6815 : /* No clobbers */); \
6816 result; \
6819 #define vmlal_high_laneq_u16(a, b, c, d) \
6820 __extension__ \
6821 ({ \
6822 uint16x8_t c_ = (c); \
6823 uint16x8_t b_ = (b); \
6824 uint32x4_t a_ = (a); \
6825 uint32x4_t result; \
6826 __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \
6827 : "=w"(result) \
6828 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6829 : /* No clobbers */); \
6830 result; \
6833 #define vmlal_high_laneq_u32(a, b, c, d) \
6834 __extension__ \
6835 ({ \
6836 uint32x4_t c_ = (c); \
6837 uint32x4_t b_ = (b); \
6838 uint64x2_t a_ = (a); \
6839 uint64x2_t result; \
6840 __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \
6841 : "=w"(result) \
6842 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6843 : /* No clobbers */); \
6844 result; \
6847 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6848 vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
6850 int32x4_t result;
6851 __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]"
6852 : "=w"(result)
6853 : "0"(a), "w"(b), "x"(c)
6854 : /* No clobbers */);
6855 return result;
6858 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6859 vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
6861 int64x2_t result;
6862 __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]"
6863 : "=w"(result)
6864 : "0"(a), "w"(b), "w"(c)
6865 : /* No clobbers */);
6866 return result;
6869 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6870 vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
6872 uint32x4_t result;
6873 __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]"
6874 : "=w"(result)
6875 : "0"(a), "w"(b), "x"(c)
6876 : /* No clobbers */);
6877 return result;
6880 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6881 vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
6883 uint64x2_t result;
6884 __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]"
6885 : "=w"(result)
6886 : "0"(a), "w"(b), "w"(c)
6887 : /* No clobbers */);
6888 return result;
6891 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
6892 vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
6894 int16x8_t result;
6895 __asm__ ("smlal2 %0.8h,%2.16b,%3.16b"
6896 : "=w"(result)
6897 : "0"(a), "w"(b), "w"(c)
6898 : /* No clobbers */);
6899 return result;
6902 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
6903 vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
6905 int32x4_t result;
6906 __asm__ ("smlal2 %0.4s,%2.8h,%3.8h"
6907 : "=w"(result)
6908 : "0"(a), "w"(b), "w"(c)
6909 : /* No clobbers */);
6910 return result;
6913 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
6914 vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
6916 int64x2_t result;
6917 __asm__ ("smlal2 %0.2d,%2.4s,%3.4s"
6918 : "=w"(result)
6919 : "0"(a), "w"(b), "w"(c)
6920 : /* No clobbers */);
6921 return result;
6924 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
6925 vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
6927 uint16x8_t result;
6928 __asm__ ("umlal2 %0.8h,%2.16b,%3.16b"
6929 : "=w"(result)
6930 : "0"(a), "w"(b), "w"(c)
6931 : /* No clobbers */);
6932 return result;
6935 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
6936 vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
6938 uint32x4_t result;
6939 __asm__ ("umlal2 %0.4s,%2.8h,%3.8h"
6940 : "=w"(result)
6941 : "0"(a), "w"(b), "w"(c)
6942 : /* No clobbers */);
6943 return result;
6946 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
6947 vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
6949 uint64x2_t result;
6950 __asm__ ("umlal2 %0.2d,%2.4s,%3.4s"
6951 : "=w"(result)
6952 : "0"(a), "w"(b), "w"(c)
6953 : /* No clobbers */);
6954 return result;
6957 #define vmlal_lane_s16(a, b, c, d) \
6958 __extension__ \
6959 ({ \
6960 int16x4_t c_ = (c); \
6961 int16x4_t b_ = (b); \
6962 int32x4_t a_ = (a); \
6963 int32x4_t result; \
6964 __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \
6965 : "=w"(result) \
6966 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6967 : /* No clobbers */); \
6968 result; \
6971 #define vmlal_lane_s32(a, b, c, d) \
6972 __extension__ \
6973 ({ \
6974 int32x2_t c_ = (c); \
6975 int32x2_t b_ = (b); \
6976 int64x2_t a_ = (a); \
6977 int64x2_t result; \
6978 __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \
6979 : "=w"(result) \
6980 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
6981 : /* No clobbers */); \
6982 result; \
6985 #define vmlal_lane_u16(a, b, c, d) \
6986 __extension__ \
6987 ({ \
6988 uint16x4_t c_ = (c); \
6989 uint16x4_t b_ = (b); \
6990 uint32x4_t a_ = (a); \
6991 uint32x4_t result; \
6992 __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \
6993 : "=w"(result) \
6994 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
6995 : /* No clobbers */); \
6996 result; \
6999 #define vmlal_lane_u32(a, b, c, d) \
7000 __extension__ \
7001 ({ \
7002 uint32x2_t c_ = (c); \
7003 uint32x2_t b_ = (b); \
7004 uint64x2_t a_ = (a); \
7005 uint64x2_t result; \
7006 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7007 : "=w"(result) \
7008 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7009 : /* No clobbers */); \
7010 result; \
7013 #define vmlal_laneq_s16(a, b, c, d) \
7014 __extension__ \
7015 ({ \
7016 int16x8_t c_ = (c); \
7017 int16x4_t b_ = (b); \
7018 int32x4_t a_ = (a); \
7019 int32x4_t result; \
7020 __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \
7021 : "=w"(result) \
7022 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7023 : /* No clobbers */); \
7024 result; \
7027 #define vmlal_laneq_s32(a, b, c, d) \
7028 __extension__ \
7029 ({ \
7030 int32x4_t c_ = (c); \
7031 int32x2_t b_ = (b); \
7032 int64x2_t a_ = (a); \
7033 int64x2_t result; \
7034 __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \
7035 : "=w"(result) \
7036 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7037 : /* No clobbers */); \
7038 result; \
7041 #define vmlal_laneq_u16(a, b, c, d) \
7042 __extension__ \
7043 ({ \
7044 uint16x8_t c_ = (c); \
7045 uint16x4_t b_ = (b); \
7046 uint32x4_t a_ = (a); \
7047 uint32x4_t result; \
7048 __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \
7049 : "=w"(result) \
7050 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7051 : /* No clobbers */); \
7052 result; \
7055 #define vmlal_laneq_u32(a, b, c, d) \
7056 __extension__ \
7057 ({ \
7058 uint32x4_t c_ = (c); \
7059 uint32x2_t b_ = (b); \
7060 uint64x2_t a_ = (a); \
7061 uint64x2_t result; \
7062 __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \
7063 : "=w"(result) \
7064 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7065 : /* No clobbers */); \
7066 result; \
7069 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7070 vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7072 int32x4_t result;
7073 __asm__ ("smlal %0.4s,%2.4h,%3.h[0]"
7074 : "=w"(result)
7075 : "0"(a), "w"(b), "x"(c)
7076 : /* No clobbers */);
7077 return result;
7080 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7081 vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7083 int64x2_t result;
7084 __asm__ ("smlal %0.2d,%2.2s,%3.s[0]"
7085 : "=w"(result)
7086 : "0"(a), "w"(b), "w"(c)
7087 : /* No clobbers */);
7088 return result;
7091 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7092 vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7094 uint32x4_t result;
7095 __asm__ ("umlal %0.4s,%2.4h,%3.h[0]"
7096 : "=w"(result)
7097 : "0"(a), "w"(b), "x"(c)
7098 : /* No clobbers */);
7099 return result;
7102 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7103 vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7105 uint64x2_t result;
7106 __asm__ ("umlal %0.2d,%2.2s,%3.s[0]"
7107 : "=w"(result)
7108 : "0"(a), "w"(b), "w"(c)
7109 : /* No clobbers */);
7110 return result;
7113 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7114 vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7116 int16x8_t result;
7117 __asm__ ("smlal %0.8h,%2.8b,%3.8b"
7118 : "=w"(result)
7119 : "0"(a), "w"(b), "w"(c)
7120 : /* No clobbers */);
7121 return result;
7124 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7125 vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7127 int32x4_t result;
7128 __asm__ ("smlal %0.4s,%2.4h,%3.4h"
7129 : "=w"(result)
7130 : "0"(a), "w"(b), "w"(c)
7131 : /* No clobbers */);
7132 return result;
7135 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7136 vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7138 int64x2_t result;
7139 __asm__ ("smlal %0.2d,%2.2s,%3.2s"
7140 : "=w"(result)
7141 : "0"(a), "w"(b), "w"(c)
7142 : /* No clobbers */);
7143 return result;
7146 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7147 vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7149 uint16x8_t result;
7150 __asm__ ("umlal %0.8h,%2.8b,%3.8b"
7151 : "=w"(result)
7152 : "0"(a), "w"(b), "w"(c)
7153 : /* No clobbers */);
7154 return result;
7157 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7158 vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7160 uint32x4_t result;
7161 __asm__ ("umlal %0.4s,%2.4h,%3.4h"
7162 : "=w"(result)
7163 : "0"(a), "w"(b), "w"(c)
7164 : /* No clobbers */);
7165 return result;
7168 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7169 vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7171 uint64x2_t result;
7172 __asm__ ("umlal %0.2d,%2.2s,%3.2s"
7173 : "=w"(result)
7174 : "0"(a), "w"(b), "w"(c)
7175 : /* No clobbers */);
7176 return result;
7179 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7180 vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7182 float32x4_t result;
7183 float32x4_t t1;
7184 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s"
7185 : "=w"(result), "=w"(t1)
7186 : "0"(a), "w"(b), "w"(c)
7187 : /* No clobbers */);
7188 return result;
7191 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7192 vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7194 int16x8_t result;
7195 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7196 : "=w"(result)
7197 : "0"(a), "w"(b), "x"(c)
7198 : /* No clobbers */);
7199 return result;
7202 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7203 vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7205 int32x4_t result;
7206 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7207 : "=w"(result)
7208 : "0"(a), "w"(b), "w"(c)
7209 : /* No clobbers */);
7210 return result;
7213 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7214 vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7216 uint16x8_t result;
7217 __asm__ ("mla %0.8h,%2.8h,%3.h[0]"
7218 : "=w"(result)
7219 : "0"(a), "w"(b), "x"(c)
7220 : /* No clobbers */);
7221 return result;
7224 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7225 vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7227 uint32x4_t result;
7228 __asm__ ("mla %0.4s,%2.4s,%3.s[0]"
7229 : "=w"(result)
7230 : "0"(a), "w"(b), "w"(c)
7231 : /* No clobbers */);
7232 return result;
7235 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7236 vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7238 int8x16_t result;
7239 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7240 : "=w"(result)
7241 : "0"(a), "w"(b), "w"(c)
7242 : /* No clobbers */);
7243 return result;
7246 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7247 vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7249 int16x8_t result;
7250 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7251 : "=w"(result)
7252 : "0"(a), "w"(b), "w"(c)
7253 : /* No clobbers */);
7254 return result;
7257 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7258 vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7260 int32x4_t result;
7261 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7262 : "=w"(result)
7263 : "0"(a), "w"(b), "w"(c)
7264 : /* No clobbers */);
7265 return result;
7268 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7269 vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7271 uint8x16_t result;
7272 __asm__ ("mla %0.16b, %2.16b, %3.16b"
7273 : "=w"(result)
7274 : "0"(a), "w"(b), "w"(c)
7275 : /* No clobbers */);
7276 return result;
7279 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7280 vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7282 uint16x8_t result;
7283 __asm__ ("mla %0.8h, %2.8h, %3.8h"
7284 : "=w"(result)
7285 : "0"(a), "w"(b), "w"(c)
7286 : /* No clobbers */);
7287 return result;
7290 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7291 vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7293 uint32x4_t result;
7294 __asm__ ("mla %0.4s, %2.4s, %3.4s"
7295 : "=w"(result)
7296 : "0"(a), "w"(b), "w"(c)
7297 : /* No clobbers */);
7298 return result;
7301 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
7302 vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c)
7304 float32x2_t result;
7305 float32x2_t t1;
7306 __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s"
7307 : "=w"(result), "=w"(t1)
7308 : "0"(a), "w"(b), "w"(c)
7309 : /* No clobbers */);
7310 return result;
7313 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7314 vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c)
7316 int16x4_t result;
7317 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7318 : "=w"(result)
7319 : "0"(a), "w"(b), "x"(c)
7320 : /* No clobbers */);
7321 return result;
7324 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7325 vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c)
7327 int32x2_t result;
7328 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7329 : "=w"(result)
7330 : "0"(a), "w"(b), "w"(c)
7331 : /* No clobbers */);
7332 return result;
7335 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7336 vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c)
7338 uint16x4_t result;
7339 __asm__ ("mls %0.4h, %2.4h, %3.h[0]"
7340 : "=w"(result)
7341 : "0"(a), "w"(b), "x"(c)
7342 : /* No clobbers */);
7343 return result;
7346 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7347 vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c)
7349 uint32x2_t result;
7350 __asm__ ("mls %0.2s, %2.2s, %3.s[0]"
7351 : "=w"(result)
7352 : "0"(a), "w"(b), "w"(c)
7353 : /* No clobbers */);
7354 return result;
7357 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
7358 vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c)
7360 int8x8_t result;
7361 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7362 : "=w"(result)
7363 : "0"(a), "w"(b), "w"(c)
7364 : /* No clobbers */);
7365 return result;
7368 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
7369 vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c)
7371 int16x4_t result;
7372 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7373 : "=w"(result)
7374 : "0"(a), "w"(b), "w"(c)
7375 : /* No clobbers */);
7376 return result;
7379 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
7380 vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c)
7382 int32x2_t result;
7383 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7384 : "=w"(result)
7385 : "0"(a), "w"(b), "w"(c)
7386 : /* No clobbers */);
7387 return result;
7390 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
7391 vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c)
7393 uint8x8_t result;
7394 __asm__ ("mls %0.8b,%2.8b,%3.8b"
7395 : "=w"(result)
7396 : "0"(a), "w"(b), "w"(c)
7397 : /* No clobbers */);
7398 return result;
7401 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
7402 vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c)
7404 uint16x4_t result;
7405 __asm__ ("mls %0.4h,%2.4h,%3.4h"
7406 : "=w"(result)
7407 : "0"(a), "w"(b), "w"(c)
7408 : /* No clobbers */);
7409 return result;
7412 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
7413 vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c)
7415 uint32x2_t result;
7416 __asm__ ("mls %0.2s,%2.2s,%3.2s"
7417 : "=w"(result)
7418 : "0"(a), "w"(b), "w"(c)
7419 : /* No clobbers */);
7420 return result;
7423 #define vmlsl_high_lane_s16(a, b, c, d) \
7424 __extension__ \
7425 ({ \
7426 int16x4_t c_ = (c); \
7427 int16x8_t b_ = (b); \
7428 int32x4_t a_ = (a); \
7429 int32x4_t result; \
7430 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7431 : "=w"(result) \
7432 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7433 : /* No clobbers */); \
7434 result; \
7437 #define vmlsl_high_lane_s32(a, b, c, d) \
7438 __extension__ \
7439 ({ \
7440 int32x2_t c_ = (c); \
7441 int32x4_t b_ = (b); \
7442 int64x2_t a_ = (a); \
7443 int64x2_t result; \
7444 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7445 : "=w"(result) \
7446 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7447 : /* No clobbers */); \
7448 result; \
7451 #define vmlsl_high_lane_u16(a, b, c, d) \
7452 __extension__ \
7453 ({ \
7454 uint16x4_t c_ = (c); \
7455 uint16x8_t b_ = (b); \
7456 uint32x4_t a_ = (a); \
7457 uint32x4_t result; \
7458 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7459 : "=w"(result) \
7460 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7461 : /* No clobbers */); \
7462 result; \
7465 #define vmlsl_high_lane_u32(a, b, c, d) \
7466 __extension__ \
7467 ({ \
7468 uint32x2_t c_ = (c); \
7469 uint32x4_t b_ = (b); \
7470 uint64x2_t a_ = (a); \
7471 uint64x2_t result; \
7472 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7473 : "=w"(result) \
7474 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7475 : /* No clobbers */); \
7476 result; \
7479 #define vmlsl_high_laneq_s16(a, b, c, d) \
7480 __extension__ \
7481 ({ \
7482 int16x8_t c_ = (c); \
7483 int16x8_t b_ = (b); \
7484 int32x4_t a_ = (a); \
7485 int32x4_t result; \
7486 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \
7487 : "=w"(result) \
7488 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7489 : /* No clobbers */); \
7490 result; \
7493 #define vmlsl_high_laneq_s32(a, b, c, d) \
7494 __extension__ \
7495 ({ \
7496 int32x4_t c_ = (c); \
7497 int32x4_t b_ = (b); \
7498 int64x2_t a_ = (a); \
7499 int64x2_t result; \
7500 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \
7501 : "=w"(result) \
7502 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7503 : /* No clobbers */); \
7504 result; \
7507 #define vmlsl_high_laneq_u16(a, b, c, d) \
7508 __extension__ \
7509 ({ \
7510 uint16x8_t c_ = (c); \
7511 uint16x8_t b_ = (b); \
7512 uint32x4_t a_ = (a); \
7513 uint32x4_t result; \
7514 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \
7515 : "=w"(result) \
7516 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7517 : /* No clobbers */); \
7518 result; \
7521 #define vmlsl_high_laneq_u32(a, b, c, d) \
7522 __extension__ \
7523 ({ \
7524 uint32x4_t c_ = (c); \
7525 uint32x4_t b_ = (b); \
7526 uint64x2_t a_ = (a); \
7527 uint64x2_t result; \
7528 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \
7529 : "=w"(result) \
7530 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7531 : /* No clobbers */); \
7532 result; \
7535 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7536 vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c)
7538 int32x4_t result;
7539 __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]"
7540 : "=w"(result)
7541 : "0"(a), "w"(b), "x"(c)
7542 : /* No clobbers */);
7543 return result;
7546 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7547 vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c)
7549 int64x2_t result;
7550 __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]"
7551 : "=w"(result)
7552 : "0"(a), "w"(b), "w"(c)
7553 : /* No clobbers */);
7554 return result;
7557 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7558 vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c)
7560 uint32x4_t result;
7561 __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]"
7562 : "=w"(result)
7563 : "0"(a), "w"(b), "x"(c)
7564 : /* No clobbers */);
7565 return result;
7568 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7569 vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c)
7571 uint64x2_t result;
7572 __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]"
7573 : "=w"(result)
7574 : "0"(a), "w"(b), "w"(c)
7575 : /* No clobbers */);
7576 return result;
7579 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7580 vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c)
7582 int16x8_t result;
7583 __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b"
7584 : "=w"(result)
7585 : "0"(a), "w"(b), "w"(c)
7586 : /* No clobbers */);
7587 return result;
7590 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7591 vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c)
7593 int32x4_t result;
7594 __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h"
7595 : "=w"(result)
7596 : "0"(a), "w"(b), "w"(c)
7597 : /* No clobbers */);
7598 return result;
7601 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7602 vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c)
7604 int64x2_t result;
7605 __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s"
7606 : "=w"(result)
7607 : "0"(a), "w"(b), "w"(c)
7608 : /* No clobbers */);
7609 return result;
7612 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7613 vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c)
7615 uint16x8_t result;
7616 __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b"
7617 : "=w"(result)
7618 : "0"(a), "w"(b), "w"(c)
7619 : /* No clobbers */);
7620 return result;
7623 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7624 vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c)
7626 uint32x4_t result;
7627 __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h"
7628 : "=w"(result)
7629 : "0"(a), "w"(b), "w"(c)
7630 : /* No clobbers */);
7631 return result;
7634 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7635 vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c)
7637 uint64x2_t result;
7638 __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s"
7639 : "=w"(result)
7640 : "0"(a), "w"(b), "w"(c)
7641 : /* No clobbers */);
7642 return result;
7645 #define vmlsl_lane_s16(a, b, c, d) \
7646 __extension__ \
7647 ({ \
7648 int16x4_t c_ = (c); \
7649 int16x4_t b_ = (b); \
7650 int32x4_t a_ = (a); \
7651 int32x4_t result; \
7652 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7653 : "=w"(result) \
7654 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7655 : /* No clobbers */); \
7656 result; \
7659 #define vmlsl_lane_s32(a, b, c, d) \
7660 __extension__ \
7661 ({ \
7662 int32x2_t c_ = (c); \
7663 int32x2_t b_ = (b); \
7664 int64x2_t a_ = (a); \
7665 int64x2_t result; \
7666 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7667 : "=w"(result) \
7668 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7669 : /* No clobbers */); \
7670 result; \
7673 #define vmlsl_lane_u16(a, b, c, d) \
7674 __extension__ \
7675 ({ \
7676 uint16x4_t c_ = (c); \
7677 uint16x4_t b_ = (b); \
7678 uint32x4_t a_ = (a); \
7679 uint32x4_t result; \
7680 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7681 : "=w"(result) \
7682 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7683 : /* No clobbers */); \
7684 result; \
7687 #define vmlsl_lane_u32(a, b, c, d) \
7688 __extension__ \
7689 ({ \
7690 uint32x2_t c_ = (c); \
7691 uint32x2_t b_ = (b); \
7692 uint64x2_t a_ = (a); \
7693 uint64x2_t result; \
7694 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7695 : "=w"(result) \
7696 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7697 : /* No clobbers */); \
7698 result; \
7701 #define vmlsl_laneq_s16(a, b, c, d) \
7702 __extension__ \
7703 ({ \
7704 int16x8_t c_ = (c); \
7705 int16x4_t b_ = (b); \
7706 int32x4_t a_ = (a); \
7707 int32x4_t result; \
7708 __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \
7709 : "=w"(result) \
7710 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7711 : /* No clobbers */); \
7712 result; \
7715 #define vmlsl_laneq_s32(a, b, c, d) \
7716 __extension__ \
7717 ({ \
7718 int32x4_t c_ = (c); \
7719 int32x2_t b_ = (b); \
7720 int64x2_t a_ = (a); \
7721 int64x2_t result; \
7722 __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \
7723 : "=w"(result) \
7724 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7725 : /* No clobbers */); \
7726 result; \
7729 #define vmlsl_laneq_u16(a, b, c, d) \
7730 __extension__ \
7731 ({ \
7732 uint16x8_t c_ = (c); \
7733 uint16x4_t b_ = (b); \
7734 uint32x4_t a_ = (a); \
7735 uint32x4_t result; \
7736 __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \
7737 : "=w"(result) \
7738 : "0"(a_), "w"(b_), "x"(c_), "i"(d) \
7739 : /* No clobbers */); \
7740 result; \
7743 #define vmlsl_laneq_u32(a, b, c, d) \
7744 __extension__ \
7745 ({ \
7746 uint32x4_t c_ = (c); \
7747 uint32x2_t b_ = (b); \
7748 uint64x2_t a_ = (a); \
7749 uint64x2_t result; \
7750 __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \
7751 : "=w"(result) \
7752 : "0"(a_), "w"(b_), "w"(c_), "i"(d) \
7753 : /* No clobbers */); \
7754 result; \
7757 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7758 vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c)
7760 int32x4_t result;
7761 __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]"
7762 : "=w"(result)
7763 : "0"(a), "w"(b), "x"(c)
7764 : /* No clobbers */);
7765 return result;
7768 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7769 vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c)
7771 int64x2_t result;
7772 __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]"
7773 : "=w"(result)
7774 : "0"(a), "w"(b), "w"(c)
7775 : /* No clobbers */);
7776 return result;
7779 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7780 vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c)
7782 uint32x4_t result;
7783 __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]"
7784 : "=w"(result)
7785 : "0"(a), "w"(b), "x"(c)
7786 : /* No clobbers */);
7787 return result;
7790 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7791 vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c)
7793 uint64x2_t result;
7794 __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]"
7795 : "=w"(result)
7796 : "0"(a), "w"(b), "w"(c)
7797 : /* No clobbers */);
7798 return result;
7801 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7802 vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c)
7804 int16x8_t result;
7805 __asm__ ("smlsl %0.8h, %2.8b, %3.8b"
7806 : "=w"(result)
7807 : "0"(a), "w"(b), "w"(c)
7808 : /* No clobbers */);
7809 return result;
7812 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7813 vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c)
7815 int32x4_t result;
7816 __asm__ ("smlsl %0.4s, %2.4h, %3.4h"
7817 : "=w"(result)
7818 : "0"(a), "w"(b), "w"(c)
7819 : /* No clobbers */);
7820 return result;
7823 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
7824 vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c)
7826 int64x2_t result;
7827 __asm__ ("smlsl %0.2d, %2.2s, %3.2s"
7828 : "=w"(result)
7829 : "0"(a), "w"(b), "w"(c)
7830 : /* No clobbers */);
7831 return result;
7834 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7835 vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c)
7837 uint16x8_t result;
7838 __asm__ ("umlsl %0.8h, %2.8b, %3.8b"
7839 : "=w"(result)
7840 : "0"(a), "w"(b), "w"(c)
7841 : /* No clobbers */);
7842 return result;
7845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7846 vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c)
7848 uint32x4_t result;
7849 __asm__ ("umlsl %0.4s, %2.4h, %3.4h"
7850 : "=w"(result)
7851 : "0"(a), "w"(b), "w"(c)
7852 : /* No clobbers */);
7853 return result;
7856 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
7857 vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c)
7859 uint64x2_t result;
7860 __asm__ ("umlsl %0.2d, %2.2s, %3.2s"
7861 : "=w"(result)
7862 : "0"(a), "w"(b), "w"(c)
7863 : /* No clobbers */);
7864 return result;
7867 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
7868 vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c)
7870 float32x4_t result;
7871 float32x4_t t1;
7872 __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s"
7873 : "=w"(result), "=w"(t1)
7874 : "0"(a), "w"(b), "w"(c)
7875 : /* No clobbers */);
7876 return result;
7879 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7880 vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c)
7882 int16x8_t result;
7883 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7884 : "=w"(result)
7885 : "0"(a), "w"(b), "x"(c)
7886 : /* No clobbers */);
7887 return result;
7890 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7891 vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c)
7893 int32x4_t result;
7894 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7895 : "=w"(result)
7896 : "0"(a), "w"(b), "w"(c)
7897 : /* No clobbers */);
7898 return result;
7901 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7902 vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c)
7904 uint16x8_t result;
7905 __asm__ ("mls %0.8h, %2.8h, %3.h[0]"
7906 : "=w"(result)
7907 : "0"(a), "w"(b), "x"(c)
7908 : /* No clobbers */);
7909 return result;
7912 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7913 vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c)
7915 uint32x4_t result;
7916 __asm__ ("mls %0.4s, %2.4s, %3.s[0]"
7917 : "=w"(result)
7918 : "0"(a), "w"(b), "w"(c)
7919 : /* No clobbers */);
7920 return result;
7923 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
7924 vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c)
7926 int8x16_t result;
7927 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7928 : "=w"(result)
7929 : "0"(a), "w"(b), "w"(c)
7930 : /* No clobbers */);
7931 return result;
7934 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7935 vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c)
7937 int16x8_t result;
7938 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7939 : "=w"(result)
7940 : "0"(a), "w"(b), "w"(c)
7941 : /* No clobbers */);
7942 return result;
7945 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
7946 vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c)
7948 int32x4_t result;
7949 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7950 : "=w"(result)
7951 : "0"(a), "w"(b), "w"(c)
7952 : /* No clobbers */);
7953 return result;
7956 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
7957 vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c)
7959 uint8x16_t result;
7960 __asm__ ("mls %0.16b,%2.16b,%3.16b"
7961 : "=w"(result)
7962 : "0"(a), "w"(b), "w"(c)
7963 : /* No clobbers */);
7964 return result;
7967 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
7968 vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c)
7970 uint16x8_t result;
7971 __asm__ ("mls %0.8h,%2.8h,%3.8h"
7972 : "=w"(result)
7973 : "0"(a), "w"(b), "w"(c)
7974 : /* No clobbers */);
7975 return result;
7978 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
7979 vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c)
7981 uint32x4_t result;
7982 __asm__ ("mls %0.4s,%2.4s,%3.4s"
7983 : "=w"(result)
7984 : "0"(a), "w"(b), "w"(c)
7985 : /* No clobbers */);
7986 return result;
7989 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
7990 vmovl_high_s8 (int8x16_t a)
7992 int16x8_t result;
7993 __asm__ ("sshll2 %0.8h,%1.16b,#0"
7994 : "=w"(result)
7995 : "w"(a)
7996 : /* No clobbers */);
7997 return result;
8000 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8001 vmovl_high_s16 (int16x8_t a)
8003 int32x4_t result;
8004 __asm__ ("sshll2 %0.4s,%1.8h,#0"
8005 : "=w"(result)
8006 : "w"(a)
8007 : /* No clobbers */);
8008 return result;
8011 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8012 vmovl_high_s32 (int32x4_t a)
8014 int64x2_t result;
8015 __asm__ ("sshll2 %0.2d,%1.4s,#0"
8016 : "=w"(result)
8017 : "w"(a)
8018 : /* No clobbers */);
8019 return result;
8022 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8023 vmovl_high_u8 (uint8x16_t a)
8025 uint16x8_t result;
8026 __asm__ ("ushll2 %0.8h,%1.16b,#0"
8027 : "=w"(result)
8028 : "w"(a)
8029 : /* No clobbers */);
8030 return result;
8033 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8034 vmovl_high_u16 (uint16x8_t a)
8036 uint32x4_t result;
8037 __asm__ ("ushll2 %0.4s,%1.8h,#0"
8038 : "=w"(result)
8039 : "w"(a)
8040 : /* No clobbers */);
8041 return result;
8044 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8045 vmovl_high_u32 (uint32x4_t a)
8047 uint64x2_t result;
8048 __asm__ ("ushll2 %0.2d,%1.4s,#0"
8049 : "=w"(result)
8050 : "w"(a)
8051 : /* No clobbers */);
8052 return result;
8055 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8056 vmovl_s8 (int8x8_t a)
8058 int16x8_t result;
8059 __asm__ ("sshll %0.8h,%1.8b,#0"
8060 : "=w"(result)
8061 : "w"(a)
8062 : /* No clobbers */);
8063 return result;
8066 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8067 vmovl_s16 (int16x4_t a)
8069 int32x4_t result;
8070 __asm__ ("sshll %0.4s,%1.4h,#0"
8071 : "=w"(result)
8072 : "w"(a)
8073 : /* No clobbers */);
8074 return result;
8077 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8078 vmovl_s32 (int32x2_t a)
8080 int64x2_t result;
8081 __asm__ ("sshll %0.2d,%1.2s,#0"
8082 : "=w"(result)
8083 : "w"(a)
8084 : /* No clobbers */);
8085 return result;
8088 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8089 vmovl_u8 (uint8x8_t a)
8091 uint16x8_t result;
8092 __asm__ ("ushll %0.8h,%1.8b,#0"
8093 : "=w"(result)
8094 : "w"(a)
8095 : /* No clobbers */);
8096 return result;
8099 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8100 vmovl_u16 (uint16x4_t a)
8102 uint32x4_t result;
8103 __asm__ ("ushll %0.4s,%1.4h,#0"
8104 : "=w"(result)
8105 : "w"(a)
8106 : /* No clobbers */);
8107 return result;
8110 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8111 vmovl_u32 (uint32x2_t a)
8113 uint64x2_t result;
8114 __asm__ ("ushll %0.2d,%1.2s,#0"
8115 : "=w"(result)
8116 : "w"(a)
8117 : /* No clobbers */);
8118 return result;
8121 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
8122 vmovn_high_s16 (int8x8_t a, int16x8_t b)
8124 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
8125 __asm__ ("xtn2 %0.16b,%1.8h"
8126 : "+w"(result)
8127 : "w"(b)
8128 : /* No clobbers */);
8129 return result;
8132 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8133 vmovn_high_s32 (int16x4_t a, int32x4_t b)
8135 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
8136 __asm__ ("xtn2 %0.8h,%1.4s"
8137 : "+w"(result)
8138 : "w"(b)
8139 : /* No clobbers */);
8140 return result;
8143 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8144 vmovn_high_s64 (int32x2_t a, int64x2_t b)
8146 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
8147 __asm__ ("xtn2 %0.4s,%1.2d"
8148 : "+w"(result)
8149 : "w"(b)
8150 : /* No clobbers */);
8151 return result;
8154 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
8155 vmovn_high_u16 (uint8x8_t a, uint16x8_t b)
8157 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
8158 __asm__ ("xtn2 %0.16b,%1.8h"
8159 : "+w"(result)
8160 : "w"(b)
8161 : /* No clobbers */);
8162 return result;
8165 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8166 vmovn_high_u32 (uint16x4_t a, uint32x4_t b)
8168 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
8169 __asm__ ("xtn2 %0.8h,%1.4s"
8170 : "+w"(result)
8171 : "w"(b)
8172 : /* No clobbers */);
8173 return result;
8176 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8177 vmovn_high_u64 (uint32x2_t a, uint64x2_t b)
8179 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
8180 __asm__ ("xtn2 %0.4s,%1.2d"
8181 : "+w"(result)
8182 : "w"(b)
8183 : /* No clobbers */);
8184 return result;
8187 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8188 vmovn_s16 (int16x8_t a)
8190 int8x8_t result;
8191 __asm__ ("xtn %0.8b,%1.8h"
8192 : "=w"(result)
8193 : "w"(a)
8194 : /* No clobbers */);
8195 return result;
8198 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8199 vmovn_s32 (int32x4_t a)
8201 int16x4_t result;
8202 __asm__ ("xtn %0.4h,%1.4s"
8203 : "=w"(result)
8204 : "w"(a)
8205 : /* No clobbers */);
8206 return result;
8209 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8210 vmovn_s64 (int64x2_t a)
8212 int32x2_t result;
8213 __asm__ ("xtn %0.2s,%1.2d"
8214 : "=w"(result)
8215 : "w"(a)
8216 : /* No clobbers */);
8217 return result;
8220 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8221 vmovn_u16 (uint16x8_t a)
8223 uint8x8_t result;
8224 __asm__ ("xtn %0.8b,%1.8h"
8225 : "=w"(result)
8226 : "w"(a)
8227 : /* No clobbers */);
8228 return result;
8231 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8232 vmovn_u32 (uint32x4_t a)
8234 uint16x4_t result;
8235 __asm__ ("xtn %0.4h,%1.4s"
8236 : "=w"(result)
8237 : "w"(a)
8238 : /* No clobbers */);
8239 return result;
8242 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8243 vmovn_u64 (uint64x2_t a)
8245 uint32x2_t result;
8246 __asm__ ("xtn %0.2s,%1.2d"
8247 : "=w"(result)
8248 : "w"(a)
8249 : /* No clobbers */);
8250 return result;
8253 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8254 vmul_n_f32 (float32x2_t a, float32_t b)
8256 float32x2_t result;
8257 __asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
8258 : "=w"(result)
8259 : "w"(a), "w"(b)
8260 : /* No clobbers */);
8261 return result;
8264 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8265 vmul_n_s16 (int16x4_t a, int16_t b)
8267 int16x4_t result;
8268 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8269 : "=w"(result)
8270 : "w"(a), "x"(b)
8271 : /* No clobbers */);
8272 return result;
8275 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8276 vmul_n_s32 (int32x2_t a, int32_t b)
8278 int32x2_t result;
8279 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8280 : "=w"(result)
8281 : "w"(a), "w"(b)
8282 : /* No clobbers */);
8283 return result;
8286 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8287 vmul_n_u16 (uint16x4_t a, uint16_t b)
8289 uint16x4_t result;
8290 __asm__ ("mul %0.4h,%1.4h,%2.h[0]"
8291 : "=w"(result)
8292 : "w"(a), "x"(b)
8293 : /* No clobbers */);
8294 return result;
8297 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8298 vmul_n_u32 (uint32x2_t a, uint32_t b)
8300 uint32x2_t result;
8301 __asm__ ("mul %0.2s,%1.2s,%2.s[0]"
8302 : "=w"(result)
8303 : "w"(a), "w"(b)
8304 : /* No clobbers */);
8305 return result;
8308 #define vmull_high_lane_s16(a, b, c) \
8309 __extension__ \
8310 ({ \
8311 int16x8_t b_ = (b); \
8312 int16x8_t a_ = (a); \
8313 int32x4_t result; \
8314 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8315 : "=w"(result) \
8316 : "w"(a_), "x"(b_), "i"(c) \
8317 : /* No clobbers */); \
8318 result; \
8321 #define vmull_high_lane_s32(a, b, c) \
8322 __extension__ \
8323 ({ \
8324 int32x4_t b_ = (b); \
8325 int32x4_t a_ = (a); \
8326 int64x2_t result; \
8327 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8328 : "=w"(result) \
8329 : "w"(a_), "w"(b_), "i"(c) \
8330 : /* No clobbers */); \
8331 result; \
8334 #define vmull_high_lane_u16(a, b, c) \
8335 __extension__ \
8336 ({ \
8337 uint16x8_t b_ = (b); \
8338 uint16x8_t a_ = (a); \
8339 uint32x4_t result; \
8340 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8341 : "=w"(result) \
8342 : "w"(a_), "x"(b_), "i"(c) \
8343 : /* No clobbers */); \
8344 result; \
8347 #define vmull_high_lane_u32(a, b, c) \
8348 __extension__ \
8349 ({ \
8350 uint32x4_t b_ = (b); \
8351 uint32x4_t a_ = (a); \
8352 uint64x2_t result; \
8353 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8354 : "=w"(result) \
8355 : "w"(a_), "w"(b_), "i"(c) \
8356 : /* No clobbers */); \
8357 result; \
8360 #define vmull_high_laneq_s16(a, b, c) \
8361 __extension__ \
8362 ({ \
8363 int16x8_t b_ = (b); \
8364 int16x8_t a_ = (a); \
8365 int32x4_t result; \
8366 __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \
8367 : "=w"(result) \
8368 : "w"(a_), "x"(b_), "i"(c) \
8369 : /* No clobbers */); \
8370 result; \
8373 #define vmull_high_laneq_s32(a, b, c) \
8374 __extension__ \
8375 ({ \
8376 int32x4_t b_ = (b); \
8377 int32x4_t a_ = (a); \
8378 int64x2_t result; \
8379 __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \
8380 : "=w"(result) \
8381 : "w"(a_), "w"(b_), "i"(c) \
8382 : /* No clobbers */); \
8383 result; \
8386 #define vmull_high_laneq_u16(a, b, c) \
8387 __extension__ \
8388 ({ \
8389 uint16x8_t b_ = (b); \
8390 uint16x8_t a_ = (a); \
8391 uint32x4_t result; \
8392 __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \
8393 : "=w"(result) \
8394 : "w"(a_), "x"(b_), "i"(c) \
8395 : /* No clobbers */); \
8396 result; \
8399 #define vmull_high_laneq_u32(a, b, c) \
8400 __extension__ \
8401 ({ \
8402 uint32x4_t b_ = (b); \
8403 uint32x4_t a_ = (a); \
8404 uint64x2_t result; \
8405 __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \
8406 : "=w"(result) \
8407 : "w"(a_), "w"(b_), "i"(c) \
8408 : /* No clobbers */); \
8409 result; \
8412 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8413 vmull_high_n_s16 (int16x8_t a, int16_t b)
8415 int32x4_t result;
8416 __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]"
8417 : "=w"(result)
8418 : "w"(a), "x"(b)
8419 : /* No clobbers */);
8420 return result;
8423 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8424 vmull_high_n_s32 (int32x4_t a, int32_t b)
8426 int64x2_t result;
8427 __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]"
8428 : "=w"(result)
8429 : "w"(a), "w"(b)
8430 : /* No clobbers */);
8431 return result;
8434 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8435 vmull_high_n_u16 (uint16x8_t a, uint16_t b)
8437 uint32x4_t result;
8438 __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]"
8439 : "=w"(result)
8440 : "w"(a), "x"(b)
8441 : /* No clobbers */);
8442 return result;
8445 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8446 vmull_high_n_u32 (uint32x4_t a, uint32_t b)
8448 uint64x2_t result;
8449 __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]"
8450 : "=w"(result)
8451 : "w"(a), "w"(b)
8452 : /* No clobbers */);
8453 return result;
8456 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8457 vmull_high_p8 (poly8x16_t a, poly8x16_t b)
8459 poly16x8_t result;
8460 __asm__ ("pmull2 %0.8h,%1.16b,%2.16b"
8461 : "=w"(result)
8462 : "w"(a), "w"(b)
8463 : /* No clobbers */);
8464 return result;
8467 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8468 vmull_high_s8 (int8x16_t a, int8x16_t b)
8470 int16x8_t result;
8471 __asm__ ("smull2 %0.8h,%1.16b,%2.16b"
8472 : "=w"(result)
8473 : "w"(a), "w"(b)
8474 : /* No clobbers */);
8475 return result;
8478 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8479 vmull_high_s16 (int16x8_t a, int16x8_t b)
8481 int32x4_t result;
8482 __asm__ ("smull2 %0.4s,%1.8h,%2.8h"
8483 : "=w"(result)
8484 : "w"(a), "w"(b)
8485 : /* No clobbers */);
8486 return result;
8489 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8490 vmull_high_s32 (int32x4_t a, int32x4_t b)
8492 int64x2_t result;
8493 __asm__ ("smull2 %0.2d,%1.4s,%2.4s"
8494 : "=w"(result)
8495 : "w"(a), "w"(b)
8496 : /* No clobbers */);
8497 return result;
8500 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8501 vmull_high_u8 (uint8x16_t a, uint8x16_t b)
8503 uint16x8_t result;
8504 __asm__ ("umull2 %0.8h,%1.16b,%2.16b"
8505 : "=w"(result)
8506 : "w"(a), "w"(b)
8507 : /* No clobbers */);
8508 return result;
8511 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8512 vmull_high_u16 (uint16x8_t a, uint16x8_t b)
8514 uint32x4_t result;
8515 __asm__ ("umull2 %0.4s,%1.8h,%2.8h"
8516 : "=w"(result)
8517 : "w"(a), "w"(b)
8518 : /* No clobbers */);
8519 return result;
8522 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8523 vmull_high_u32 (uint32x4_t a, uint32x4_t b)
8525 uint64x2_t result;
8526 __asm__ ("umull2 %0.2d,%1.4s,%2.4s"
8527 : "=w"(result)
8528 : "w"(a), "w"(b)
8529 : /* No clobbers */);
8530 return result;
8533 #define vmull_lane_s16(a, b, c) \
8534 __extension__ \
8535 ({ \
8536 int16x4_t b_ = (b); \
8537 int16x4_t a_ = (a); \
8538 int32x4_t result; \
8539 __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \
8540 : "=w"(result) \
8541 : "w"(a_), "x"(b_), "i"(c) \
8542 : /* No clobbers */); \
8543 result; \
8546 #define vmull_lane_s32(a, b, c) \
8547 __extension__ \
8548 ({ \
8549 int32x2_t b_ = (b); \
8550 int32x2_t a_ = (a); \
8551 int64x2_t result; \
8552 __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \
8553 : "=w"(result) \
8554 : "w"(a_), "w"(b_), "i"(c) \
8555 : /* No clobbers */); \
8556 result; \
8559 #define vmull_lane_u16(a, b, c) \
8560 __extension__ \
8561 ({ \
8562 uint16x4_t b_ = (b); \
8563 uint16x4_t a_ = (a); \
8564 uint32x4_t result; \
8565 __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \
8566 : "=w"(result) \
8567 : "w"(a_), "x"(b_), "i"(c) \
8568 : /* No clobbers */); \
8569 result; \
8572 #define vmull_lane_u32(a, b, c) \
8573 __extension__ \
8574 ({ \
8575 uint32x2_t b_ = (b); \
8576 uint32x2_t a_ = (a); \
8577 uint64x2_t result; \
8578 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8579 : "=w"(result) \
8580 : "w"(a_), "w"(b_), "i"(c) \
8581 : /* No clobbers */); \
8582 result; \
8585 #define vmull_laneq_s16(a, b, c) \
8586 __extension__ \
8587 ({ \
8588 int16x8_t b_ = (b); \
8589 int16x4_t a_ = (a); \
8590 int32x4_t result; \
8591 __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \
8592 : "=w"(result) \
8593 : "w"(a_), "x"(b_), "i"(c) \
8594 : /* No clobbers */); \
8595 result; \
8598 #define vmull_laneq_s32(a, b, c) \
8599 __extension__ \
8600 ({ \
8601 int32x4_t b_ = (b); \
8602 int32x2_t a_ = (a); \
8603 int64x2_t result; \
8604 __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \
8605 : "=w"(result) \
8606 : "w"(a_), "w"(b_), "i"(c) \
8607 : /* No clobbers */); \
8608 result; \
8611 #define vmull_laneq_u16(a, b, c) \
8612 __extension__ \
8613 ({ \
8614 uint16x8_t b_ = (b); \
8615 uint16x4_t a_ = (a); \
8616 uint32x4_t result; \
8617 __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \
8618 : "=w"(result) \
8619 : "w"(a_), "x"(b_), "i"(c) \
8620 : /* No clobbers */); \
8621 result; \
8624 #define vmull_laneq_u32(a, b, c) \
8625 __extension__ \
8626 ({ \
8627 uint32x4_t b_ = (b); \
8628 uint32x2_t a_ = (a); \
8629 uint64x2_t result; \
8630 __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \
8631 : "=w"(result) \
8632 : "w"(a_), "w"(b_), "i"(c) \
8633 : /* No clobbers */); \
8634 result; \
8637 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8638 vmull_n_s16 (int16x4_t a, int16_t b)
8640 int32x4_t result;
8641 __asm__ ("smull %0.4s,%1.4h,%2.h[0]"
8642 : "=w"(result)
8643 : "w"(a), "x"(b)
8644 : /* No clobbers */);
8645 return result;
8648 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8649 vmull_n_s32 (int32x2_t a, int32_t b)
8651 int64x2_t result;
8652 __asm__ ("smull %0.2d,%1.2s,%2.s[0]"
8653 : "=w"(result)
8654 : "w"(a), "w"(b)
8655 : /* No clobbers */);
8656 return result;
8659 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8660 vmull_n_u16 (uint16x4_t a, uint16_t b)
8662 uint32x4_t result;
8663 __asm__ ("umull %0.4s,%1.4h,%2.h[0]"
8664 : "=w"(result)
8665 : "w"(a), "x"(b)
8666 : /* No clobbers */);
8667 return result;
8670 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8671 vmull_n_u32 (uint32x2_t a, uint32_t b)
8673 uint64x2_t result;
8674 __asm__ ("umull %0.2d,%1.2s,%2.s[0]"
8675 : "=w"(result)
8676 : "w"(a), "w"(b)
8677 : /* No clobbers */);
8678 return result;
8681 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
8682 vmull_p8 (poly8x8_t a, poly8x8_t b)
8684 poly16x8_t result;
8685 __asm__ ("pmull %0.8h, %1.8b, %2.8b"
8686 : "=w"(result)
8687 : "w"(a), "w"(b)
8688 : /* No clobbers */);
8689 return result;
8692 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8693 vmull_s8 (int8x8_t a, int8x8_t b)
8695 int16x8_t result;
8696 __asm__ ("smull %0.8h, %1.8b, %2.8b"
8697 : "=w"(result)
8698 : "w"(a), "w"(b)
8699 : /* No clobbers */);
8700 return result;
8703 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8704 vmull_s16 (int16x4_t a, int16x4_t b)
8706 int32x4_t result;
8707 __asm__ ("smull %0.4s, %1.4h, %2.4h"
8708 : "=w"(result)
8709 : "w"(a), "w"(b)
8710 : /* No clobbers */);
8711 return result;
8714 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
8715 vmull_s32 (int32x2_t a, int32x2_t b)
8717 int64x2_t result;
8718 __asm__ ("smull %0.2d, %1.2s, %2.2s"
8719 : "=w"(result)
8720 : "w"(a), "w"(b)
8721 : /* No clobbers */);
8722 return result;
8725 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8726 vmull_u8 (uint8x8_t a, uint8x8_t b)
8728 uint16x8_t result;
8729 __asm__ ("umull %0.8h, %1.8b, %2.8b"
8730 : "=w"(result)
8731 : "w"(a), "w"(b)
8732 : /* No clobbers */);
8733 return result;
8736 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8737 vmull_u16 (uint16x4_t a, uint16x4_t b)
8739 uint32x4_t result;
8740 __asm__ ("umull %0.4s, %1.4h, %2.4h"
8741 : "=w"(result)
8742 : "w"(a), "w"(b)
8743 : /* No clobbers */);
8744 return result;
8747 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
8748 vmull_u32 (uint32x2_t a, uint32x2_t b)
8750 uint64x2_t result;
8751 __asm__ ("umull %0.2d, %1.2s, %2.2s"
8752 : "=w"(result)
8753 : "w"(a), "w"(b)
8754 : /* No clobbers */);
8755 return result;
8758 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8759 vmulq_n_f32 (float32x4_t a, float32_t b)
8761 float32x4_t result;
8762 __asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
8763 : "=w"(result)
8764 : "w"(a), "w"(b)
8765 : /* No clobbers */);
8766 return result;
8769 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8770 vmulq_n_f64 (float64x2_t a, float64_t b)
8772 float64x2_t result;
8773 __asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
8774 : "=w"(result)
8775 : "w"(a), "w"(b)
8776 : /* No clobbers */);
8777 return result;
8780 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
8781 vmulq_n_s16 (int16x8_t a, int16_t b)
8783 int16x8_t result;
8784 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8785 : "=w"(result)
8786 : "w"(a), "x"(b)
8787 : /* No clobbers */);
8788 return result;
8791 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
8792 vmulq_n_s32 (int32x4_t a, int32_t b)
8794 int32x4_t result;
8795 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8796 : "=w"(result)
8797 : "w"(a), "w"(b)
8798 : /* No clobbers */);
8799 return result;
8802 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
8803 vmulq_n_u16 (uint16x8_t a, uint16_t b)
8805 uint16x8_t result;
8806 __asm__ ("mul %0.8h,%1.8h,%2.h[0]"
8807 : "=w"(result)
8808 : "w"(a), "x"(b)
8809 : /* No clobbers */);
8810 return result;
8813 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
8814 vmulq_n_u32 (uint32x4_t a, uint32_t b)
8816 uint32x4_t result;
8817 __asm__ ("mul %0.4s,%1.4s,%2.s[0]"
8818 : "=w"(result)
8819 : "w"(a), "w"(b)
8820 : /* No clobbers */);
8821 return result;
8824 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
8825 vmulx_f32 (float32x2_t a, float32x2_t b)
8827 float32x2_t result;
8828 __asm__ ("fmulx %0.2s,%1.2s,%2.2s"
8829 : "=w"(result)
8830 : "w"(a), "w"(b)
8831 : /* No clobbers */);
8832 return result;
8835 #define vmulx_lane_f32(a, b, c) \
8836 __extension__ \
8837 ({ \
8838 float32x4_t b_ = (b); \
8839 float32x2_t a_ = (a); \
8840 float32x2_t result; \
8841 __asm__ ("fmulx %0.2s,%1.2s,%2.s[%3]" \
8842 : "=w"(result) \
8843 : "w"(a_), "w"(b_), "i"(c) \
8844 : /* No clobbers */); \
8845 result; \
8848 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
8849 vmulxd_f64 (float64_t a, float64_t b)
8851 float64_t result;
8852 __asm__ ("fmulx %d0, %d1, %d2"
8853 : "=w"(result)
8854 : "w"(a), "w"(b)
8855 : /* No clobbers */);
8856 return result;
8859 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
8860 vmulxq_f32 (float32x4_t a, float32x4_t b)
8862 float32x4_t result;
8863 __asm__ ("fmulx %0.4s,%1.4s,%2.4s"
8864 : "=w"(result)
8865 : "w"(a), "w"(b)
8866 : /* No clobbers */);
8867 return result;
8870 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
8871 vmulxq_f64 (float64x2_t a, float64x2_t b)
8873 float64x2_t result;
8874 __asm__ ("fmulx %0.2d,%1.2d,%2.2d"
8875 : "=w"(result)
8876 : "w"(a), "w"(b)
8877 : /* No clobbers */);
8878 return result;
8881 #define vmulxq_lane_f32(a, b, c) \
8882 __extension__ \
8883 ({ \
8884 float32x4_t b_ = (b); \
8885 float32x4_t a_ = (a); \
8886 float32x4_t result; \
8887 __asm__ ("fmulx %0.4s,%1.4s,%2.s[%3]" \
8888 : "=w"(result) \
8889 : "w"(a_), "w"(b_), "i"(c) \
8890 : /* No clobbers */); \
8891 result; \
8894 #define vmulxq_lane_f64(a, b, c) \
8895 __extension__ \
8896 ({ \
8897 float64x2_t b_ = (b); \
8898 float64x2_t a_ = (a); \
8899 float64x2_t result; \
8900 __asm__ ("fmulx %0.2d,%1.2d,%2.d[%3]" \
8901 : "=w"(result) \
8902 : "w"(a_), "w"(b_), "i"(c) \
8903 : /* No clobbers */); \
8904 result; \
8907 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
8908 vmulxs_f32 (float32_t a, float32_t b)
8910 float32_t result;
8911 __asm__ ("fmulx %s0, %s1, %s2"
8912 : "=w"(result)
8913 : "w"(a), "w"(b)
8914 : /* No clobbers */);
8915 return result;
8918 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
8919 vmvn_p8 (poly8x8_t a)
8921 poly8x8_t result;
8922 __asm__ ("mvn %0.8b,%1.8b"
8923 : "=w"(result)
8924 : "w"(a)
8925 : /* No clobbers */);
8926 return result;
8929 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
8930 vmvn_s8 (int8x8_t a)
8932 int8x8_t result;
8933 __asm__ ("mvn %0.8b,%1.8b"
8934 : "=w"(result)
8935 : "w"(a)
8936 : /* No clobbers */);
8937 return result;
8940 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
8941 vmvn_s16 (int16x4_t a)
8943 int16x4_t result;
8944 __asm__ ("mvn %0.8b,%1.8b"
8945 : "=w"(result)
8946 : "w"(a)
8947 : /* No clobbers */);
8948 return result;
8951 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
8952 vmvn_s32 (int32x2_t a)
8954 int32x2_t result;
8955 __asm__ ("mvn %0.8b,%1.8b"
8956 : "=w"(result)
8957 : "w"(a)
8958 : /* No clobbers */);
8959 return result;
8962 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
8963 vmvn_u8 (uint8x8_t a)
8965 uint8x8_t result;
8966 __asm__ ("mvn %0.8b,%1.8b"
8967 : "=w"(result)
8968 : "w"(a)
8969 : /* No clobbers */);
8970 return result;
8973 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
8974 vmvn_u16 (uint16x4_t a)
8976 uint16x4_t result;
8977 __asm__ ("mvn %0.8b,%1.8b"
8978 : "=w"(result)
8979 : "w"(a)
8980 : /* No clobbers */);
8981 return result;
8984 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
8985 vmvn_u32 (uint32x2_t a)
8987 uint32x2_t result;
8988 __asm__ ("mvn %0.8b,%1.8b"
8989 : "=w"(result)
8990 : "w"(a)
8991 : /* No clobbers */);
8992 return result;
8995 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
8996 vmvnq_p8 (poly8x16_t a)
8998 poly8x16_t result;
8999 __asm__ ("mvn %0.16b,%1.16b"
9000 : "=w"(result)
9001 : "w"(a)
9002 : /* No clobbers */);
9003 return result;
9006 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9007 vmvnq_s8 (int8x16_t a)
9009 int8x16_t result;
9010 __asm__ ("mvn %0.16b,%1.16b"
9011 : "=w"(result)
9012 : "w"(a)
9013 : /* No clobbers */);
9014 return result;
9017 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9018 vmvnq_s16 (int16x8_t a)
9020 int16x8_t result;
9021 __asm__ ("mvn %0.16b,%1.16b"
9022 : "=w"(result)
9023 : "w"(a)
9024 : /* No clobbers */);
9025 return result;
9028 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9029 vmvnq_s32 (int32x4_t a)
9031 int32x4_t result;
9032 __asm__ ("mvn %0.16b,%1.16b"
9033 : "=w"(result)
9034 : "w"(a)
9035 : /* No clobbers */);
9036 return result;
9039 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9040 vmvnq_u8 (uint8x16_t a)
9042 uint8x16_t result;
9043 __asm__ ("mvn %0.16b,%1.16b"
9044 : "=w"(result)
9045 : "w"(a)
9046 : /* No clobbers */);
9047 return result;
9050 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9051 vmvnq_u16 (uint16x8_t a)
9053 uint16x8_t result;
9054 __asm__ ("mvn %0.16b,%1.16b"
9055 : "=w"(result)
9056 : "w"(a)
9057 : /* No clobbers */);
9058 return result;
9061 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9062 vmvnq_u32 (uint32x4_t a)
9064 uint32x4_t result;
9065 __asm__ ("mvn %0.16b,%1.16b"
9066 : "=w"(result)
9067 : "w"(a)
9068 : /* No clobbers */);
9069 return result;
9073 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9074 vpadal_s8 (int16x4_t a, int8x8_t b)
9076 int16x4_t result;
9077 __asm__ ("sadalp %0.4h,%2.8b"
9078 : "=w"(result)
9079 : "0"(a), "w"(b)
9080 : /* No clobbers */);
9081 return result;
9084 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9085 vpadal_s16 (int32x2_t a, int16x4_t b)
9087 int32x2_t result;
9088 __asm__ ("sadalp %0.2s,%2.4h"
9089 : "=w"(result)
9090 : "0"(a), "w"(b)
9091 : /* No clobbers */);
9092 return result;
9095 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9096 vpadal_s32 (int64x1_t a, int32x2_t b)
9098 int64x1_t result;
9099 __asm__ ("sadalp %0.1d,%2.2s"
9100 : "=w"(result)
9101 : "0"(a), "w"(b)
9102 : /* No clobbers */);
9103 return result;
9106 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9107 vpadal_u8 (uint16x4_t a, uint8x8_t b)
9109 uint16x4_t result;
9110 __asm__ ("uadalp %0.4h,%2.8b"
9111 : "=w"(result)
9112 : "0"(a), "w"(b)
9113 : /* No clobbers */);
9114 return result;
9117 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9118 vpadal_u16 (uint32x2_t a, uint16x4_t b)
9120 uint32x2_t result;
9121 __asm__ ("uadalp %0.2s,%2.4h"
9122 : "=w"(result)
9123 : "0"(a), "w"(b)
9124 : /* No clobbers */);
9125 return result;
9128 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9129 vpadal_u32 (uint64x1_t a, uint32x2_t b)
9131 uint64x1_t result;
9132 __asm__ ("uadalp %0.1d,%2.2s"
9133 : "=w"(result)
9134 : "0"(a), "w"(b)
9135 : /* No clobbers */);
9136 return result;
9139 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9140 vpadalq_s8 (int16x8_t a, int8x16_t b)
9142 int16x8_t result;
9143 __asm__ ("sadalp %0.8h,%2.16b"
9144 : "=w"(result)
9145 : "0"(a), "w"(b)
9146 : /* No clobbers */);
9147 return result;
9150 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9151 vpadalq_s16 (int32x4_t a, int16x8_t b)
9153 int32x4_t result;
9154 __asm__ ("sadalp %0.4s,%2.8h"
9155 : "=w"(result)
9156 : "0"(a), "w"(b)
9157 : /* No clobbers */);
9158 return result;
9161 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9162 vpadalq_s32 (int64x2_t a, int32x4_t b)
9164 int64x2_t result;
9165 __asm__ ("sadalp %0.2d,%2.4s"
9166 : "=w"(result)
9167 : "0"(a), "w"(b)
9168 : /* No clobbers */);
9169 return result;
9172 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9173 vpadalq_u8 (uint16x8_t a, uint8x16_t b)
9175 uint16x8_t result;
9176 __asm__ ("uadalp %0.8h,%2.16b"
9177 : "=w"(result)
9178 : "0"(a), "w"(b)
9179 : /* No clobbers */);
9180 return result;
9183 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9184 vpadalq_u16 (uint32x4_t a, uint16x8_t b)
9186 uint32x4_t result;
9187 __asm__ ("uadalp %0.4s,%2.8h"
9188 : "=w"(result)
9189 : "0"(a), "w"(b)
9190 : /* No clobbers */);
9191 return result;
9194 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9195 vpadalq_u32 (uint64x2_t a, uint32x4_t b)
9197 uint64x2_t result;
9198 __asm__ ("uadalp %0.2d,%2.4s"
9199 : "=w"(result)
9200 : "0"(a), "w"(b)
9201 : /* No clobbers */);
9202 return result;
9205 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9206 vpadd_f32 (float32x2_t a, float32x2_t b)
9208 float32x2_t result;
9209 __asm__ ("faddp %0.2s,%1.2s,%2.2s"
9210 : "=w"(result)
9211 : "w"(a), "w"(b)
9212 : /* No clobbers */);
9213 return result;
9216 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9217 vpaddl_s8 (int8x8_t a)
9219 int16x4_t result;
9220 __asm__ ("saddlp %0.4h,%1.8b"
9221 : "=w"(result)
9222 : "w"(a)
9223 : /* No clobbers */);
9224 return result;
9227 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9228 vpaddl_s16 (int16x4_t a)
9230 int32x2_t result;
9231 __asm__ ("saddlp %0.2s,%1.4h"
9232 : "=w"(result)
9233 : "w"(a)
9234 : /* No clobbers */);
9235 return result;
9238 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
9239 vpaddl_s32 (int32x2_t a)
9241 int64x1_t result;
9242 __asm__ ("saddlp %0.1d,%1.2s"
9243 : "=w"(result)
9244 : "w"(a)
9245 : /* No clobbers */);
9246 return result;
9249 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9250 vpaddl_u8 (uint8x8_t a)
9252 uint16x4_t result;
9253 __asm__ ("uaddlp %0.4h,%1.8b"
9254 : "=w"(result)
9255 : "w"(a)
9256 : /* No clobbers */);
9257 return result;
9260 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9261 vpaddl_u16 (uint16x4_t a)
9263 uint32x2_t result;
9264 __asm__ ("uaddlp %0.2s,%1.4h"
9265 : "=w"(result)
9266 : "w"(a)
9267 : /* No clobbers */);
9268 return result;
9271 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
9272 vpaddl_u32 (uint32x2_t a)
9274 uint64x1_t result;
9275 __asm__ ("uaddlp %0.1d,%1.2s"
9276 : "=w"(result)
9277 : "w"(a)
9278 : /* No clobbers */);
9279 return result;
9282 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9283 vpaddlq_s8 (int8x16_t a)
9285 int16x8_t result;
9286 __asm__ ("saddlp %0.8h,%1.16b"
9287 : "=w"(result)
9288 : "w"(a)
9289 : /* No clobbers */);
9290 return result;
9293 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9294 vpaddlq_s16 (int16x8_t a)
9296 int32x4_t result;
9297 __asm__ ("saddlp %0.4s,%1.8h"
9298 : "=w"(result)
9299 : "w"(a)
9300 : /* No clobbers */);
9301 return result;
9304 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9305 vpaddlq_s32 (int32x4_t a)
9307 int64x2_t result;
9308 __asm__ ("saddlp %0.2d,%1.4s"
9309 : "=w"(result)
9310 : "w"(a)
9311 : /* No clobbers */);
9312 return result;
9315 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9316 vpaddlq_u8 (uint8x16_t a)
9318 uint16x8_t result;
9319 __asm__ ("uaddlp %0.8h,%1.16b"
9320 : "=w"(result)
9321 : "w"(a)
9322 : /* No clobbers */);
9323 return result;
9326 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9327 vpaddlq_u16 (uint16x8_t a)
9329 uint32x4_t result;
9330 __asm__ ("uaddlp %0.4s,%1.8h"
9331 : "=w"(result)
9332 : "w"(a)
9333 : /* No clobbers */);
9334 return result;
9337 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9338 vpaddlq_u32 (uint32x4_t a)
9340 uint64x2_t result;
9341 __asm__ ("uaddlp %0.2d,%1.4s"
9342 : "=w"(result)
9343 : "w"(a)
9344 : /* No clobbers */);
9345 return result;
9348 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9349 vpaddq_f32 (float32x4_t a, float32x4_t b)
9351 float32x4_t result;
9352 __asm__ ("faddp %0.4s,%1.4s,%2.4s"
9353 : "=w"(result)
9354 : "w"(a), "w"(b)
9355 : /* No clobbers */);
9356 return result;
9359 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9360 vpaddq_f64 (float64x2_t a, float64x2_t b)
9362 float64x2_t result;
9363 __asm__ ("faddp %0.2d,%1.2d,%2.2d"
9364 : "=w"(result)
9365 : "w"(a), "w"(b)
9366 : /* No clobbers */);
9367 return result;
9370 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9371 vpaddq_s8 (int8x16_t a, int8x16_t b)
9373 int8x16_t result;
9374 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9375 : "=w"(result)
9376 : "w"(a), "w"(b)
9377 : /* No clobbers */);
9378 return result;
9381 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9382 vpaddq_s16 (int16x8_t a, int16x8_t b)
9384 int16x8_t result;
9385 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9386 : "=w"(result)
9387 : "w"(a), "w"(b)
9388 : /* No clobbers */);
9389 return result;
9392 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9393 vpaddq_s32 (int32x4_t a, int32x4_t b)
9395 int32x4_t result;
9396 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9397 : "=w"(result)
9398 : "w"(a), "w"(b)
9399 : /* No clobbers */);
9400 return result;
9403 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
9404 vpaddq_s64 (int64x2_t a, int64x2_t b)
9406 int64x2_t result;
9407 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9408 : "=w"(result)
9409 : "w"(a), "w"(b)
9410 : /* No clobbers */);
9411 return result;
9414 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9415 vpaddq_u8 (uint8x16_t a, uint8x16_t b)
9417 uint8x16_t result;
9418 __asm__ ("addp %0.16b,%1.16b,%2.16b"
9419 : "=w"(result)
9420 : "w"(a), "w"(b)
9421 : /* No clobbers */);
9422 return result;
9425 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9426 vpaddq_u16 (uint16x8_t a, uint16x8_t b)
9428 uint16x8_t result;
9429 __asm__ ("addp %0.8h,%1.8h,%2.8h"
9430 : "=w"(result)
9431 : "w"(a), "w"(b)
9432 : /* No clobbers */);
9433 return result;
9436 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9437 vpaddq_u32 (uint32x4_t a, uint32x4_t b)
9439 uint32x4_t result;
9440 __asm__ ("addp %0.4s,%1.4s,%2.4s"
9441 : "=w"(result)
9442 : "w"(a), "w"(b)
9443 : /* No clobbers */);
9444 return result;
9447 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
9448 vpaddq_u64 (uint64x2_t a, uint64x2_t b)
9450 uint64x2_t result;
9451 __asm__ ("addp %0.2d,%1.2d,%2.2d"
9452 : "=w"(result)
9453 : "w"(a), "w"(b)
9454 : /* No clobbers */);
9455 return result;
9458 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9459 vpadds_f32 (float32x2_t a)
9461 float32_t result;
9462 __asm__ ("faddp %s0,%1.2s"
9463 : "=w"(result)
9464 : "w"(a)
9465 : /* No clobbers */);
9466 return result;
9469 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9470 vpmax_f32 (float32x2_t a, float32x2_t b)
9472 float32x2_t result;
9473 __asm__ ("fmaxp %0.2s, %1.2s, %2.2s"
9474 : "=w"(result)
9475 : "w"(a), "w"(b)
9476 : /* No clobbers */);
9477 return result;
9480 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9481 vpmax_s8 (int8x8_t a, int8x8_t b)
9483 int8x8_t result;
9484 __asm__ ("smaxp %0.8b, %1.8b, %2.8b"
9485 : "=w"(result)
9486 : "w"(a), "w"(b)
9487 : /* No clobbers */);
9488 return result;
9491 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9492 vpmax_s16 (int16x4_t a, int16x4_t b)
9494 int16x4_t result;
9495 __asm__ ("smaxp %0.4h, %1.4h, %2.4h"
9496 : "=w"(result)
9497 : "w"(a), "w"(b)
9498 : /* No clobbers */);
9499 return result;
9502 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9503 vpmax_s32 (int32x2_t a, int32x2_t b)
9505 int32x2_t result;
9506 __asm__ ("smaxp %0.2s, %1.2s, %2.2s"
9507 : "=w"(result)
9508 : "w"(a), "w"(b)
9509 : /* No clobbers */);
9510 return result;
9513 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9514 vpmax_u8 (uint8x8_t a, uint8x8_t b)
9516 uint8x8_t result;
9517 __asm__ ("umaxp %0.8b, %1.8b, %2.8b"
9518 : "=w"(result)
9519 : "w"(a), "w"(b)
9520 : /* No clobbers */);
9521 return result;
9524 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9525 vpmax_u16 (uint16x4_t a, uint16x4_t b)
9527 uint16x4_t result;
9528 __asm__ ("umaxp %0.4h, %1.4h, %2.4h"
9529 : "=w"(result)
9530 : "w"(a), "w"(b)
9531 : /* No clobbers */);
9532 return result;
9535 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9536 vpmax_u32 (uint32x2_t a, uint32x2_t b)
9538 uint32x2_t result;
9539 __asm__ ("umaxp %0.2s, %1.2s, %2.2s"
9540 : "=w"(result)
9541 : "w"(a), "w"(b)
9542 : /* No clobbers */);
9543 return result;
9546 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9547 vpmaxnm_f32 (float32x2_t a, float32x2_t b)
9549 float32x2_t result;
9550 __asm__ ("fmaxnmp %0.2s,%1.2s,%2.2s"
9551 : "=w"(result)
9552 : "w"(a), "w"(b)
9553 : /* No clobbers */);
9554 return result;
9557 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9558 vpmaxnmq_f32 (float32x4_t a, float32x4_t b)
9560 float32x4_t result;
9561 __asm__ ("fmaxnmp %0.4s,%1.4s,%2.4s"
9562 : "=w"(result)
9563 : "w"(a), "w"(b)
9564 : /* No clobbers */);
9565 return result;
9568 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9569 vpmaxnmq_f64 (float64x2_t a, float64x2_t b)
9571 float64x2_t result;
9572 __asm__ ("fmaxnmp %0.2d,%1.2d,%2.2d"
9573 : "=w"(result)
9574 : "w"(a), "w"(b)
9575 : /* No clobbers */);
9576 return result;
9579 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9580 vpmaxnmqd_f64 (float64x2_t a)
9582 float64_t result;
9583 __asm__ ("fmaxnmp %d0,%1.2d"
9584 : "=w"(result)
9585 : "w"(a)
9586 : /* No clobbers */);
9587 return result;
9590 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9591 vpmaxnms_f32 (float32x2_t a)
9593 float32_t result;
9594 __asm__ ("fmaxnmp %s0,%1.2s"
9595 : "=w"(result)
9596 : "w"(a)
9597 : /* No clobbers */);
9598 return result;
9601 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9602 vpmaxq_f32 (float32x4_t a, float32x4_t b)
9604 float32x4_t result;
9605 __asm__ ("fmaxp %0.4s, %1.4s, %2.4s"
9606 : "=w"(result)
9607 : "w"(a), "w"(b)
9608 : /* No clobbers */);
9609 return result;
9612 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9613 vpmaxq_f64 (float64x2_t a, float64x2_t b)
9615 float64x2_t result;
9616 __asm__ ("fmaxp %0.2d, %1.2d, %2.2d"
9617 : "=w"(result)
9618 : "w"(a), "w"(b)
9619 : /* No clobbers */);
9620 return result;
9623 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9624 vpmaxq_s8 (int8x16_t a, int8x16_t b)
9626 int8x16_t result;
9627 __asm__ ("smaxp %0.16b, %1.16b, %2.16b"
9628 : "=w"(result)
9629 : "w"(a), "w"(b)
9630 : /* No clobbers */);
9631 return result;
9634 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9635 vpmaxq_s16 (int16x8_t a, int16x8_t b)
9637 int16x8_t result;
9638 __asm__ ("smaxp %0.8h, %1.8h, %2.8h"
9639 : "=w"(result)
9640 : "w"(a), "w"(b)
9641 : /* No clobbers */);
9642 return result;
9645 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9646 vpmaxq_s32 (int32x4_t a, int32x4_t b)
9648 int32x4_t result;
9649 __asm__ ("smaxp %0.4s, %1.4s, %2.4s"
9650 : "=w"(result)
9651 : "w"(a), "w"(b)
9652 : /* No clobbers */);
9653 return result;
9656 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9657 vpmaxq_u8 (uint8x16_t a, uint8x16_t b)
9659 uint8x16_t result;
9660 __asm__ ("umaxp %0.16b, %1.16b, %2.16b"
9661 : "=w"(result)
9662 : "w"(a), "w"(b)
9663 : /* No clobbers */);
9664 return result;
9667 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9668 vpmaxq_u16 (uint16x8_t a, uint16x8_t b)
9670 uint16x8_t result;
9671 __asm__ ("umaxp %0.8h, %1.8h, %2.8h"
9672 : "=w"(result)
9673 : "w"(a), "w"(b)
9674 : /* No clobbers */);
9675 return result;
9678 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9679 vpmaxq_u32 (uint32x4_t a, uint32x4_t b)
9681 uint32x4_t result;
9682 __asm__ ("umaxp %0.4s, %1.4s, %2.4s"
9683 : "=w"(result)
9684 : "w"(a), "w"(b)
9685 : /* No clobbers */);
9686 return result;
9689 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9690 vpmaxqd_f64 (float64x2_t a)
9692 float64_t result;
9693 __asm__ ("fmaxp %d0,%1.2d"
9694 : "=w"(result)
9695 : "w"(a)
9696 : /* No clobbers */);
9697 return result;
9700 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9701 vpmaxs_f32 (float32x2_t a)
9703 float32_t result;
9704 __asm__ ("fmaxp %s0,%1.2s"
9705 : "=w"(result)
9706 : "w"(a)
9707 : /* No clobbers */);
9708 return result;
9711 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9712 vpmin_f32 (float32x2_t a, float32x2_t b)
9714 float32x2_t result;
9715 __asm__ ("fminp %0.2s, %1.2s, %2.2s"
9716 : "=w"(result)
9717 : "w"(a), "w"(b)
9718 : /* No clobbers */);
9719 return result;
9722 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
9723 vpmin_s8 (int8x8_t a, int8x8_t b)
9725 int8x8_t result;
9726 __asm__ ("sminp %0.8b, %1.8b, %2.8b"
9727 : "=w"(result)
9728 : "w"(a), "w"(b)
9729 : /* No clobbers */);
9730 return result;
9733 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9734 vpmin_s16 (int16x4_t a, int16x4_t b)
9736 int16x4_t result;
9737 __asm__ ("sminp %0.4h, %1.4h, %2.4h"
9738 : "=w"(result)
9739 : "w"(a), "w"(b)
9740 : /* No clobbers */);
9741 return result;
9744 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9745 vpmin_s32 (int32x2_t a, int32x2_t b)
9747 int32x2_t result;
9748 __asm__ ("sminp %0.2s, %1.2s, %2.2s"
9749 : "=w"(result)
9750 : "w"(a), "w"(b)
9751 : /* No clobbers */);
9752 return result;
9755 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
9756 vpmin_u8 (uint8x8_t a, uint8x8_t b)
9758 uint8x8_t result;
9759 __asm__ ("uminp %0.8b, %1.8b, %2.8b"
9760 : "=w"(result)
9761 : "w"(a), "w"(b)
9762 : /* No clobbers */);
9763 return result;
9766 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
9767 vpmin_u16 (uint16x4_t a, uint16x4_t b)
9769 uint16x4_t result;
9770 __asm__ ("uminp %0.4h, %1.4h, %2.4h"
9771 : "=w"(result)
9772 : "w"(a), "w"(b)
9773 : /* No clobbers */);
9774 return result;
9777 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
9778 vpmin_u32 (uint32x2_t a, uint32x2_t b)
9780 uint32x2_t result;
9781 __asm__ ("uminp %0.2s, %1.2s, %2.2s"
9782 : "=w"(result)
9783 : "w"(a), "w"(b)
9784 : /* No clobbers */);
9785 return result;
9788 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
9789 vpminnm_f32 (float32x2_t a, float32x2_t b)
9791 float32x2_t result;
9792 __asm__ ("fminnmp %0.2s,%1.2s,%2.2s"
9793 : "=w"(result)
9794 : "w"(a), "w"(b)
9795 : /* No clobbers */);
9796 return result;
9799 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9800 vpminnmq_f32 (float32x4_t a, float32x4_t b)
9802 float32x4_t result;
9803 __asm__ ("fminnmp %0.4s,%1.4s,%2.4s"
9804 : "=w"(result)
9805 : "w"(a), "w"(b)
9806 : /* No clobbers */);
9807 return result;
9810 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9811 vpminnmq_f64 (float64x2_t a, float64x2_t b)
9813 float64x2_t result;
9814 __asm__ ("fminnmp %0.2d,%1.2d,%2.2d"
9815 : "=w"(result)
9816 : "w"(a), "w"(b)
9817 : /* No clobbers */);
9818 return result;
9821 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9822 vpminnmqd_f64 (float64x2_t a)
9824 float64_t result;
9825 __asm__ ("fminnmp %d0,%1.2d"
9826 : "=w"(result)
9827 : "w"(a)
9828 : /* No clobbers */);
9829 return result;
9832 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9833 vpminnms_f32 (float32x2_t a)
9835 float32_t result;
9836 __asm__ ("fminnmp %s0,%1.2s"
9837 : "=w"(result)
9838 : "w"(a)
9839 : /* No clobbers */);
9840 return result;
9843 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
9844 vpminq_f32 (float32x4_t a, float32x4_t b)
9846 float32x4_t result;
9847 __asm__ ("fminp %0.4s, %1.4s, %2.4s"
9848 : "=w"(result)
9849 : "w"(a), "w"(b)
9850 : /* No clobbers */);
9851 return result;
9854 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
9855 vpminq_f64 (float64x2_t a, float64x2_t b)
9857 float64x2_t result;
9858 __asm__ ("fminp %0.2d, %1.2d, %2.2d"
9859 : "=w"(result)
9860 : "w"(a), "w"(b)
9861 : /* No clobbers */);
9862 return result;
9865 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9866 vpminq_s8 (int8x16_t a, int8x16_t b)
9868 int8x16_t result;
9869 __asm__ ("sminp %0.16b, %1.16b, %2.16b"
9870 : "=w"(result)
9871 : "w"(a), "w"(b)
9872 : /* No clobbers */);
9873 return result;
9876 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9877 vpminq_s16 (int16x8_t a, int16x8_t b)
9879 int16x8_t result;
9880 __asm__ ("sminp %0.8h, %1.8h, %2.8h"
9881 : "=w"(result)
9882 : "w"(a), "w"(b)
9883 : /* No clobbers */);
9884 return result;
9887 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9888 vpminq_s32 (int32x4_t a, int32x4_t b)
9890 int32x4_t result;
9891 __asm__ ("sminp %0.4s, %1.4s, %2.4s"
9892 : "=w"(result)
9893 : "w"(a), "w"(b)
9894 : /* No clobbers */);
9895 return result;
9898 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
9899 vpminq_u8 (uint8x16_t a, uint8x16_t b)
9901 uint8x16_t result;
9902 __asm__ ("uminp %0.16b, %1.16b, %2.16b"
9903 : "=w"(result)
9904 : "w"(a), "w"(b)
9905 : /* No clobbers */);
9906 return result;
9909 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
9910 vpminq_u16 (uint16x8_t a, uint16x8_t b)
9912 uint16x8_t result;
9913 __asm__ ("uminp %0.8h, %1.8h, %2.8h"
9914 : "=w"(result)
9915 : "w"(a), "w"(b)
9916 : /* No clobbers */);
9917 return result;
9920 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
9921 vpminq_u32 (uint32x4_t a, uint32x4_t b)
9923 uint32x4_t result;
9924 __asm__ ("uminp %0.4s, %1.4s, %2.4s"
9925 : "=w"(result)
9926 : "w"(a), "w"(b)
9927 : /* No clobbers */);
9928 return result;
9931 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
9932 vpminqd_f64 (float64x2_t a)
9934 float64_t result;
9935 __asm__ ("fminp %d0,%1.2d"
9936 : "=w"(result)
9937 : "w"(a)
9938 : /* No clobbers */);
9939 return result;
9942 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
9943 vpmins_f32 (float32x2_t a)
9945 float32_t result;
9946 __asm__ ("fminp %s0,%1.2s"
9947 : "=w"(result)
9948 : "w"(a)
9949 : /* No clobbers */);
9950 return result;
9953 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
9954 vqdmulh_n_s16 (int16x4_t a, int16_t b)
9956 int16x4_t result;
9957 __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]"
9958 : "=w"(result)
9959 : "w"(a), "x"(b)
9960 : /* No clobbers */);
9961 return result;
9964 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
9965 vqdmulh_n_s32 (int32x2_t a, int32_t b)
9967 int32x2_t result;
9968 __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]"
9969 : "=w"(result)
9970 : "w"(a), "w"(b)
9971 : /* No clobbers */);
9972 return result;
9975 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
9976 vqdmulhq_n_s16 (int16x8_t a, int16_t b)
9978 int16x8_t result;
9979 __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]"
9980 : "=w"(result)
9981 : "w"(a), "x"(b)
9982 : /* No clobbers */);
9983 return result;
9986 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
9987 vqdmulhq_n_s32 (int32x4_t a, int32_t b)
9989 int32x4_t result;
9990 __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]"
9991 : "=w"(result)
9992 : "w"(a), "w"(b)
9993 : /* No clobbers */);
9994 return result;
9997 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
9998 vqmovn_high_s16 (int8x8_t a, int16x8_t b)
10000 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10001 __asm__ ("sqxtn2 %0.16b, %1.8h"
10002 : "+w"(result)
10003 : "w"(b)
10004 : /* No clobbers */);
10005 return result;
10008 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10009 vqmovn_high_s32 (int16x4_t a, int32x4_t b)
10011 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10012 __asm__ ("sqxtn2 %0.8h, %1.4s"
10013 : "+w"(result)
10014 : "w"(b)
10015 : /* No clobbers */);
10016 return result;
10019 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10020 vqmovn_high_s64 (int32x2_t a, int64x2_t b)
10022 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10023 __asm__ ("sqxtn2 %0.4s, %1.2d"
10024 : "+w"(result)
10025 : "w"(b)
10026 : /* No clobbers */);
10027 return result;
10030 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10031 vqmovn_high_u16 (uint8x8_t a, uint16x8_t b)
10033 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10034 __asm__ ("uqxtn2 %0.16b, %1.8h"
10035 : "+w"(result)
10036 : "w"(b)
10037 : /* No clobbers */);
10038 return result;
10041 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10042 vqmovn_high_u32 (uint16x4_t a, uint32x4_t b)
10044 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10045 __asm__ ("uqxtn2 %0.8h, %1.4s"
10046 : "+w"(result)
10047 : "w"(b)
10048 : /* No clobbers */);
10049 return result;
10052 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10053 vqmovn_high_u64 (uint32x2_t a, uint64x2_t b)
10055 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10056 __asm__ ("uqxtn2 %0.4s, %1.2d"
10057 : "+w"(result)
10058 : "w"(b)
10059 : /* No clobbers */);
10060 return result;
10063 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10064 vqmovun_high_s16 (uint8x8_t a, int16x8_t b)
10066 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10067 __asm__ ("sqxtun2 %0.16b, %1.8h"
10068 : "+w"(result)
10069 : "w"(b)
10070 : /* No clobbers */);
10071 return result;
10074 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10075 vqmovun_high_s32 (uint16x4_t a, int32x4_t b)
10077 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10078 __asm__ ("sqxtun2 %0.8h, %1.4s"
10079 : "+w"(result)
10080 : "w"(b)
10081 : /* No clobbers */);
10082 return result;
10085 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10086 vqmovun_high_s64 (uint32x2_t a, int64x2_t b)
10088 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10089 __asm__ ("sqxtun2 %0.4s, %1.2d"
10090 : "+w"(result)
10091 : "w"(b)
10092 : /* No clobbers */);
10093 return result;
10096 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10097 vqrdmulh_n_s16 (int16x4_t a, int16_t b)
10099 int16x4_t result;
10100 __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]"
10101 : "=w"(result)
10102 : "w"(a), "x"(b)
10103 : /* No clobbers */);
10104 return result;
10107 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10108 vqrdmulh_n_s32 (int32x2_t a, int32_t b)
10110 int32x2_t result;
10111 __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]"
10112 : "=w"(result)
10113 : "w"(a), "w"(b)
10114 : /* No clobbers */);
10115 return result;
10118 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10119 vqrdmulhq_n_s16 (int16x8_t a, int16_t b)
10121 int16x8_t result;
10122 __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]"
10123 : "=w"(result)
10124 : "w"(a), "x"(b)
10125 : /* No clobbers */);
10126 return result;
10129 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10130 vqrdmulhq_n_s32 (int32x4_t a, int32_t b)
10132 int32x4_t result;
10133 __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]"
10134 : "=w"(result)
10135 : "w"(a), "w"(b)
10136 : /* No clobbers */);
10137 return result;
10140 #define vqrshrn_high_n_s16(a, b, c) \
10141 __extension__ \
10142 ({ \
10143 int16x8_t b_ = (b); \
10144 int8x8_t a_ = (a); \
10145 int8x16_t result = vcombine_s8 \
10146 (a_, vcreate_s8 \
10147 (__AARCH64_UINT64_C (0x0))); \
10148 __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
10149 : "+w"(result) \
10150 : "w"(b_), "i"(c) \
10151 : /* No clobbers */); \
10152 result; \
10155 #define vqrshrn_high_n_s32(a, b, c) \
10156 __extension__ \
10157 ({ \
10158 int32x4_t b_ = (b); \
10159 int16x4_t a_ = (a); \
10160 int16x8_t result = vcombine_s16 \
10161 (a_, vcreate_s16 \
10162 (__AARCH64_UINT64_C (0x0))); \
10163 __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
10164 : "+w"(result) \
10165 : "w"(b_), "i"(c) \
10166 : /* No clobbers */); \
10167 result; \
10170 #define vqrshrn_high_n_s64(a, b, c) \
10171 __extension__ \
10172 ({ \
10173 int64x2_t b_ = (b); \
10174 int32x2_t a_ = (a); \
10175 int32x4_t result = vcombine_s32 \
10176 (a_, vcreate_s32 \
10177 (__AARCH64_UINT64_C (0x0))); \
10178 __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
10179 : "+w"(result) \
10180 : "w"(b_), "i"(c) \
10181 : /* No clobbers */); \
10182 result; \
10185 #define vqrshrn_high_n_u16(a, b, c) \
10186 __extension__ \
10187 ({ \
10188 uint16x8_t b_ = (b); \
10189 uint8x8_t a_ = (a); \
10190 uint8x16_t result = vcombine_u8 \
10191 (a_, vcreate_u8 \
10192 (__AARCH64_UINT64_C (0x0))); \
10193 __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
10194 : "+w"(result) \
10195 : "w"(b_), "i"(c) \
10196 : /* No clobbers */); \
10197 result; \
10200 #define vqrshrn_high_n_u32(a, b, c) \
10201 __extension__ \
10202 ({ \
10203 uint32x4_t b_ = (b); \
10204 uint16x4_t a_ = (a); \
10205 uint16x8_t result = vcombine_u16 \
10206 (a_, vcreate_u16 \
10207 (__AARCH64_UINT64_C (0x0))); \
10208 __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
10209 : "+w"(result) \
10210 : "w"(b_), "i"(c) \
10211 : /* No clobbers */); \
10212 result; \
10215 #define vqrshrn_high_n_u64(a, b, c) \
10216 __extension__ \
10217 ({ \
10218 uint64x2_t b_ = (b); \
10219 uint32x2_t a_ = (a); \
10220 uint32x4_t result = vcombine_u32 \
10221 (a_, vcreate_u32 \
10222 (__AARCH64_UINT64_C (0x0))); \
10223 __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
10224 : "+w"(result) \
10225 : "w"(b_), "i"(c) \
10226 : /* No clobbers */); \
10227 result; \
10230 #define vqrshrun_high_n_s16(a, b, c) \
10231 __extension__ \
10232 ({ \
10233 int16x8_t b_ = (b); \
10234 uint8x8_t a_ = (a); \
10235 uint8x16_t result = vcombine_u8 \
10236 (a_, vcreate_u8 \
10237 (__AARCH64_UINT64_C (0x0))); \
10238 __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
10239 : "+w"(result) \
10240 : "w"(b_), "i"(c) \
10241 : /* No clobbers */); \
10242 result; \
10245 #define vqrshrun_high_n_s32(a, b, c) \
10246 __extension__ \
10247 ({ \
10248 int32x4_t b_ = (b); \
10249 uint16x4_t a_ = (a); \
10250 uint16x8_t result = vcombine_u16 \
10251 (a_, vcreate_u16 \
10252 (__AARCH64_UINT64_C (0x0))); \
10253 __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
10254 : "+w"(result) \
10255 : "w"(b_), "i"(c) \
10256 : /* No clobbers */); \
10257 result; \
10260 #define vqrshrun_high_n_s64(a, b, c) \
10261 __extension__ \
10262 ({ \
10263 int64x2_t b_ = (b); \
10264 uint32x2_t a_ = (a); \
10265 uint32x4_t result = vcombine_u32 \
10266 (a_, vcreate_u32 \
10267 (__AARCH64_UINT64_C (0x0))); \
10268 __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
10269 : "+w"(result) \
10270 : "w"(b_), "i"(c) \
10271 : /* No clobbers */); \
10272 result; \
10275 #define vqshrn_high_n_s16(a, b, c) \
10276 __extension__ \
10277 ({ \
10278 int16x8_t b_ = (b); \
10279 int8x8_t a_ = (a); \
10280 int8x16_t result = vcombine_s8 \
10281 (a_, vcreate_s8 \
10282 (__AARCH64_UINT64_C (0x0))); \
10283 __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
10284 : "+w"(result) \
10285 : "w"(b_), "i"(c) \
10286 : /* No clobbers */); \
10287 result; \
10290 #define vqshrn_high_n_s32(a, b, c) \
10291 __extension__ \
10292 ({ \
10293 int32x4_t b_ = (b); \
10294 int16x4_t a_ = (a); \
10295 int16x8_t result = vcombine_s16 \
10296 (a_, vcreate_s16 \
10297 (__AARCH64_UINT64_C (0x0))); \
10298 __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
10299 : "+w"(result) \
10300 : "w"(b_), "i"(c) \
10301 : /* No clobbers */); \
10302 result; \
10305 #define vqshrn_high_n_s64(a, b, c) \
10306 __extension__ \
10307 ({ \
10308 int64x2_t b_ = (b); \
10309 int32x2_t a_ = (a); \
10310 int32x4_t result = vcombine_s32 \
10311 (a_, vcreate_s32 \
10312 (__AARCH64_UINT64_C (0x0))); \
10313 __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
10314 : "+w"(result) \
10315 : "w"(b_), "i"(c) \
10316 : /* No clobbers */); \
10317 result; \
10320 #define vqshrn_high_n_u16(a, b, c) \
10321 __extension__ \
10322 ({ \
10323 uint16x8_t b_ = (b); \
10324 uint8x8_t a_ = (a); \
10325 uint8x16_t result = vcombine_u8 \
10326 (a_, vcreate_u8 \
10327 (__AARCH64_UINT64_C (0x0))); \
10328 __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
10329 : "+w"(result) \
10330 : "w"(b_), "i"(c) \
10331 : /* No clobbers */); \
10332 result; \
10335 #define vqshrn_high_n_u32(a, b, c) \
10336 __extension__ \
10337 ({ \
10338 uint32x4_t b_ = (b); \
10339 uint16x4_t a_ = (a); \
10340 uint16x8_t result = vcombine_u16 \
10341 (a_, vcreate_u16 \
10342 (__AARCH64_UINT64_C (0x0))); \
10343 __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
10344 : "+w"(result) \
10345 : "w"(b_), "i"(c) \
10346 : /* No clobbers */); \
10347 result; \
10350 #define vqshrn_high_n_u64(a, b, c) \
10351 __extension__ \
10352 ({ \
10353 uint64x2_t b_ = (b); \
10354 uint32x2_t a_ = (a); \
10355 uint32x4_t result = vcombine_u32 \
10356 (a_, vcreate_u32 \
10357 (__AARCH64_UINT64_C (0x0))); \
10358 __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
10359 : "+w"(result) \
10360 : "w"(b_), "i"(c) \
10361 : /* No clobbers */); \
10362 result; \
10365 #define vqshrun_high_n_s16(a, b, c) \
10366 __extension__ \
10367 ({ \
10368 int16x8_t b_ = (b); \
10369 uint8x8_t a_ = (a); \
10370 uint8x16_t result = vcombine_u8 \
10371 (a_, vcreate_u8 \
10372 (__AARCH64_UINT64_C (0x0))); \
10373 __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
10374 : "+w"(result) \
10375 : "w"(b_), "i"(c) \
10376 : /* No clobbers */); \
10377 result; \
10380 #define vqshrun_high_n_s32(a, b, c) \
10381 __extension__ \
10382 ({ \
10383 int32x4_t b_ = (b); \
10384 uint16x4_t a_ = (a); \
10385 uint16x8_t result = vcombine_u16 \
10386 (a_, vcreate_u16 \
10387 (__AARCH64_UINT64_C (0x0))); \
10388 __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
10389 : "+w"(result) \
10390 : "w"(b_), "i"(c) \
10391 : /* No clobbers */); \
10392 result; \
10395 #define vqshrun_high_n_s64(a, b, c) \
10396 __extension__ \
10397 ({ \
10398 int64x2_t b_ = (b); \
10399 uint32x2_t a_ = (a); \
10400 uint32x4_t result = vcombine_u32 \
10401 (a_, vcreate_u32 \
10402 (__AARCH64_UINT64_C (0x0))); \
10403 __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
10404 : "+w"(result) \
10405 : "w"(b_), "i"(c) \
10406 : /* No clobbers */); \
10407 result; \
10410 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10411 vrecpe_u32 (uint32x2_t a)
10413 uint32x2_t result;
10414 __asm__ ("urecpe %0.2s,%1.2s"
10415 : "=w"(result)
10416 : "w"(a)
10417 : /* No clobbers */);
10418 return result;
10421 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10422 vrecpeq_u32 (uint32x4_t a)
10424 uint32x4_t result;
10425 __asm__ ("urecpe %0.4s,%1.4s"
10426 : "=w"(result)
10427 : "w"(a)
10428 : /* No clobbers */);
10429 return result;
10432 #define vrshrn_high_n_s16(a, b, c) \
10433 __extension__ \
10434 ({ \
10435 int16x8_t b_ = (b); \
10436 int8x8_t a_ = (a); \
10437 int8x16_t result = vcombine_s8 \
10438 (a_, vcreate_s8 \
10439 (__AARCH64_UINT64_C (0x0))); \
10440 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
10441 : "+w"(result) \
10442 : "w"(b_), "i"(c) \
10443 : /* No clobbers */); \
10444 result; \
10447 #define vrshrn_high_n_s32(a, b, c) \
10448 __extension__ \
10449 ({ \
10450 int32x4_t b_ = (b); \
10451 int16x4_t a_ = (a); \
10452 int16x8_t result = vcombine_s16 \
10453 (a_, vcreate_s16 \
10454 (__AARCH64_UINT64_C (0x0))); \
10455 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
10456 : "+w"(result) \
10457 : "w"(b_), "i"(c) \
10458 : /* No clobbers */); \
10459 result; \
10462 #define vrshrn_high_n_s64(a, b, c) \
10463 __extension__ \
10464 ({ \
10465 int64x2_t b_ = (b); \
10466 int32x2_t a_ = (a); \
10467 int32x4_t result = vcombine_s32 \
10468 (a_, vcreate_s32 \
10469 (__AARCH64_UINT64_C (0x0))); \
10470 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
10471 : "+w"(result) \
10472 : "w"(b_), "i"(c) \
10473 : /* No clobbers */); \
10474 result; \
10477 #define vrshrn_high_n_u16(a, b, c) \
10478 __extension__ \
10479 ({ \
10480 uint16x8_t b_ = (b); \
10481 uint8x8_t a_ = (a); \
10482 uint8x16_t result = vcombine_u8 \
10483 (a_, vcreate_u8 \
10484 (__AARCH64_UINT64_C (0x0))); \
10485 __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \
10486 : "+w"(result) \
10487 : "w"(b_), "i"(c) \
10488 : /* No clobbers */); \
10489 result; \
10492 #define vrshrn_high_n_u32(a, b, c) \
10493 __extension__ \
10494 ({ \
10495 uint32x4_t b_ = (b); \
10496 uint16x4_t a_ = (a); \
10497 uint16x8_t result = vcombine_u16 \
10498 (a_, vcreate_u16 \
10499 (__AARCH64_UINT64_C (0x0))); \
10500 __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \
10501 : "+w"(result) \
10502 : "w"(b_), "i"(c) \
10503 : /* No clobbers */); \
10504 result; \
10507 #define vrshrn_high_n_u64(a, b, c) \
10508 __extension__ \
10509 ({ \
10510 uint64x2_t b_ = (b); \
10511 uint32x2_t a_ = (a); \
10512 uint32x4_t result = vcombine_u32 \
10513 (a_, vcreate_u32 \
10514 (__AARCH64_UINT64_C (0x0))); \
10515 __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \
10516 : "+w"(result) \
10517 : "w"(b_), "i"(c) \
10518 : /* No clobbers */); \
10519 result; \
10522 #define vrshrn_n_s16(a, b) \
10523 __extension__ \
10524 ({ \
10525 int16x8_t a_ = (a); \
10526 int8x8_t result; \
10527 __asm__ ("rshrn %0.8b,%1.8h,%2" \
10528 : "=w"(result) \
10529 : "w"(a_), "i"(b) \
10530 : /* No clobbers */); \
10531 result; \
10534 #define vrshrn_n_s32(a, b) \
10535 __extension__ \
10536 ({ \
10537 int32x4_t a_ = (a); \
10538 int16x4_t result; \
10539 __asm__ ("rshrn %0.4h,%1.4s,%2" \
10540 : "=w"(result) \
10541 : "w"(a_), "i"(b) \
10542 : /* No clobbers */); \
10543 result; \
10546 #define vrshrn_n_s64(a, b) \
10547 __extension__ \
10548 ({ \
10549 int64x2_t a_ = (a); \
10550 int32x2_t result; \
10551 __asm__ ("rshrn %0.2s,%1.2d,%2" \
10552 : "=w"(result) \
10553 : "w"(a_), "i"(b) \
10554 : /* No clobbers */); \
10555 result; \
10558 #define vrshrn_n_u16(a, b) \
10559 __extension__ \
10560 ({ \
10561 uint16x8_t a_ = (a); \
10562 uint8x8_t result; \
10563 __asm__ ("rshrn %0.8b,%1.8h,%2" \
10564 : "=w"(result) \
10565 : "w"(a_), "i"(b) \
10566 : /* No clobbers */); \
10567 result; \
10570 #define vrshrn_n_u32(a, b) \
10571 __extension__ \
10572 ({ \
10573 uint32x4_t a_ = (a); \
10574 uint16x4_t result; \
10575 __asm__ ("rshrn %0.4h,%1.4s,%2" \
10576 : "=w"(result) \
10577 : "w"(a_), "i"(b) \
10578 : /* No clobbers */); \
10579 result; \
10582 #define vrshrn_n_u64(a, b) \
10583 __extension__ \
10584 ({ \
10585 uint64x2_t a_ = (a); \
10586 uint32x2_t result; \
10587 __asm__ ("rshrn %0.2s,%1.2d,%2" \
10588 : "=w"(result) \
10589 : "w"(a_), "i"(b) \
10590 : /* No clobbers */); \
10591 result; \
10594 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10595 vrsqrte_f32 (float32x2_t a)
10597 float32x2_t result;
10598 __asm__ ("frsqrte %0.2s,%1.2s"
10599 : "=w"(result)
10600 : "w"(a)
10601 : /* No clobbers */);
10602 return result;
10605 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
10606 vrsqrte_f64 (float64x1_t a)
10608 float64x1_t result;
10609 __asm__ ("frsqrte %d0,%d1"
10610 : "=w"(result)
10611 : "w"(a)
10612 : /* No clobbers */);
10613 return result;
10616 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10617 vrsqrte_u32 (uint32x2_t a)
10619 uint32x2_t result;
10620 __asm__ ("ursqrte %0.2s,%1.2s"
10621 : "=w"(result)
10622 : "w"(a)
10623 : /* No clobbers */);
10624 return result;
10627 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10628 vrsqrted_f64 (float64_t a)
10630 float64_t result;
10631 __asm__ ("frsqrte %d0,%d1"
10632 : "=w"(result)
10633 : "w"(a)
10634 : /* No clobbers */);
10635 return result;
10638 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10639 vrsqrteq_f32 (float32x4_t a)
10641 float32x4_t result;
10642 __asm__ ("frsqrte %0.4s,%1.4s"
10643 : "=w"(result)
10644 : "w"(a)
10645 : /* No clobbers */);
10646 return result;
10649 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10650 vrsqrteq_f64 (float64x2_t a)
10652 float64x2_t result;
10653 __asm__ ("frsqrte %0.2d,%1.2d"
10654 : "=w"(result)
10655 : "w"(a)
10656 : /* No clobbers */);
10657 return result;
10660 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10661 vrsqrteq_u32 (uint32x4_t a)
10663 uint32x4_t result;
10664 __asm__ ("ursqrte %0.4s,%1.4s"
10665 : "=w"(result)
10666 : "w"(a)
10667 : /* No clobbers */);
10668 return result;
10671 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10672 vrsqrtes_f32 (float32_t a)
10674 float32_t result;
10675 __asm__ ("frsqrte %s0,%s1"
10676 : "=w"(result)
10677 : "w"(a)
10678 : /* No clobbers */);
10679 return result;
10682 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
10683 vrsqrts_f32 (float32x2_t a, float32x2_t b)
10685 float32x2_t result;
10686 __asm__ ("frsqrts %0.2s,%1.2s,%2.2s"
10687 : "=w"(result)
10688 : "w"(a), "w"(b)
10689 : /* No clobbers */);
10690 return result;
10693 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
10694 vrsqrtsd_f64 (float64_t a, float64_t b)
10696 float64_t result;
10697 __asm__ ("frsqrts %d0,%d1,%d2"
10698 : "=w"(result)
10699 : "w"(a), "w"(b)
10700 : /* No clobbers */);
10701 return result;
10704 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
10705 vrsqrtsq_f32 (float32x4_t a, float32x4_t b)
10707 float32x4_t result;
10708 __asm__ ("frsqrts %0.4s,%1.4s,%2.4s"
10709 : "=w"(result)
10710 : "w"(a), "w"(b)
10711 : /* No clobbers */);
10712 return result;
10715 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
10716 vrsqrtsq_f64 (float64x2_t a, float64x2_t b)
10718 float64x2_t result;
10719 __asm__ ("frsqrts %0.2d,%1.2d,%2.2d"
10720 : "=w"(result)
10721 : "w"(a), "w"(b)
10722 : /* No clobbers */);
10723 return result;
10726 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
10727 vrsqrtss_f32 (float32_t a, float32_t b)
10729 float32_t result;
10730 __asm__ ("frsqrts %s0,%s1,%s2"
10731 : "=w"(result)
10732 : "w"(a), "w"(b)
10733 : /* No clobbers */);
10734 return result;
10737 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
10738 vrsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
10740 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
10741 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
10742 : "+w"(result)
10743 : "w"(b), "w"(c)
10744 : /* No clobbers */);
10745 return result;
10748 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
10749 vrsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
10751 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
10752 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
10753 : "+w"(result)
10754 : "w"(b), "w"(c)
10755 : /* No clobbers */);
10756 return result;
10759 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
10760 vrsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
10762 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
10763 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
10764 : "+w"(result)
10765 : "w"(b), "w"(c)
10766 : /* No clobbers */);
10767 return result;
10770 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
10771 vrsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
10773 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
10774 __asm__ ("rsubhn2 %0.16b, %1.8h, %2.8h"
10775 : "+w"(result)
10776 : "w"(b), "w"(c)
10777 : /* No clobbers */);
10778 return result;
10781 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
10782 vrsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
10784 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
10785 __asm__ ("rsubhn2 %0.8h, %1.4s, %2.4s"
10786 : "+w"(result)
10787 : "w"(b), "w"(c)
10788 : /* No clobbers */);
10789 return result;
10792 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
10793 vrsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
10795 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
10796 __asm__ ("rsubhn2 %0.4s, %1.2d, %2.2d"
10797 : "+w"(result)
10798 : "w"(b), "w"(c)
10799 : /* No clobbers */);
10800 return result;
10803 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
10804 vrsubhn_s16 (int16x8_t a, int16x8_t b)
10806 int8x8_t result;
10807 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
10808 : "=w"(result)
10809 : "w"(a), "w"(b)
10810 : /* No clobbers */);
10811 return result;
10814 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
10815 vrsubhn_s32 (int32x4_t a, int32x4_t b)
10817 int16x4_t result;
10818 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
10819 : "=w"(result)
10820 : "w"(a), "w"(b)
10821 : /* No clobbers */);
10822 return result;
10825 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
10826 vrsubhn_s64 (int64x2_t a, int64x2_t b)
10828 int32x2_t result;
10829 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
10830 : "=w"(result)
10831 : "w"(a), "w"(b)
10832 : /* No clobbers */);
10833 return result;
10836 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
10837 vrsubhn_u16 (uint16x8_t a, uint16x8_t b)
10839 uint8x8_t result;
10840 __asm__ ("rsubhn %0.8b, %1.8h, %2.8h"
10841 : "=w"(result)
10842 : "w"(a), "w"(b)
10843 : /* No clobbers */);
10844 return result;
10847 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
10848 vrsubhn_u32 (uint32x4_t a, uint32x4_t b)
10850 uint16x4_t result;
10851 __asm__ ("rsubhn %0.4h, %1.4s, %2.4s"
10852 : "=w"(result)
10853 : "w"(a), "w"(b)
10854 : /* No clobbers */);
10855 return result;
10858 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
10859 vrsubhn_u64 (uint64x2_t a, uint64x2_t b)
10861 uint32x2_t result;
10862 __asm__ ("rsubhn %0.2s, %1.2d, %2.2d"
10863 : "=w"(result)
10864 : "w"(a), "w"(b)
10865 : /* No clobbers */);
10866 return result;
10869 #define vset_lane_f32(a, b, c) \
10870 __extension__ \
10871 ({ \
10872 float32x2_t b_ = (b); \
10873 float32_t a_ = (a); \
10874 float32x2_t result; \
10875 __asm__ ("ins %0.s[%3], %w1" \
10876 : "=w"(result) \
10877 : "r"(a_), "0"(b_), "i"(c) \
10878 : /* No clobbers */); \
10879 result; \
10882 #define vset_lane_f64(a, b, c) \
10883 __extension__ \
10884 ({ \
10885 float64x1_t b_ = (b); \
10886 float64_t a_ = (a); \
10887 float64x1_t result; \
10888 __asm__ ("ins %0.d[%3], %x1" \
10889 : "=w"(result) \
10890 : "r"(a_), "0"(b_), "i"(c) \
10891 : /* No clobbers */); \
10892 result; \
10895 #define vset_lane_p8(a, b, c) \
10896 __extension__ \
10897 ({ \
10898 poly8x8_t b_ = (b); \
10899 poly8_t a_ = (a); \
10900 poly8x8_t result; \
10901 __asm__ ("ins %0.b[%3], %w1" \
10902 : "=w"(result) \
10903 : "r"(a_), "0"(b_), "i"(c) \
10904 : /* No clobbers */); \
10905 result; \
10908 #define vset_lane_p16(a, b, c) \
10909 __extension__ \
10910 ({ \
10911 poly16x4_t b_ = (b); \
10912 poly16_t a_ = (a); \
10913 poly16x4_t result; \
10914 __asm__ ("ins %0.h[%3], %w1" \
10915 : "=w"(result) \
10916 : "r"(a_), "0"(b_), "i"(c) \
10917 : /* No clobbers */); \
10918 result; \
10921 #define vset_lane_s8(a, b, c) \
10922 __extension__ \
10923 ({ \
10924 int8x8_t b_ = (b); \
10925 int8_t a_ = (a); \
10926 int8x8_t result; \
10927 __asm__ ("ins %0.b[%3], %w1" \
10928 : "=w"(result) \
10929 : "r"(a_), "0"(b_), "i"(c) \
10930 : /* No clobbers */); \
10931 result; \
10934 #define vset_lane_s16(a, b, c) \
10935 __extension__ \
10936 ({ \
10937 int16x4_t b_ = (b); \
10938 int16_t a_ = (a); \
10939 int16x4_t result; \
10940 __asm__ ("ins %0.h[%3], %w1" \
10941 : "=w"(result) \
10942 : "r"(a_), "0"(b_), "i"(c) \
10943 : /* No clobbers */); \
10944 result; \
10947 #define vset_lane_s32(a, b, c) \
10948 __extension__ \
10949 ({ \
10950 int32x2_t b_ = (b); \
10951 int32_t a_ = (a); \
10952 int32x2_t result; \
10953 __asm__ ("ins %0.s[%3], %w1" \
10954 : "=w"(result) \
10955 : "r"(a_), "0"(b_), "i"(c) \
10956 : /* No clobbers */); \
10957 result; \
10960 #define vset_lane_s64(a, b, c) \
10961 __extension__ \
10962 ({ \
10963 int64x1_t b_ = (b); \
10964 int64_t a_ = (a); \
10965 int64x1_t result; \
10966 __asm__ ("ins %0.d[%3], %x1" \
10967 : "=w"(result) \
10968 : "r"(a_), "0"(b_), "i"(c) \
10969 : /* No clobbers */); \
10970 result; \
10973 #define vset_lane_u8(a, b, c) \
10974 __extension__ \
10975 ({ \
10976 uint8x8_t b_ = (b); \
10977 uint8_t a_ = (a); \
10978 uint8x8_t result; \
10979 __asm__ ("ins %0.b[%3], %w1" \
10980 : "=w"(result) \
10981 : "r"(a_), "0"(b_), "i"(c) \
10982 : /* No clobbers */); \
10983 result; \
10986 #define vset_lane_u16(a, b, c) \
10987 __extension__ \
10988 ({ \
10989 uint16x4_t b_ = (b); \
10990 uint16_t a_ = (a); \
10991 uint16x4_t result; \
10992 __asm__ ("ins %0.h[%3], %w1" \
10993 : "=w"(result) \
10994 : "r"(a_), "0"(b_), "i"(c) \
10995 : /* No clobbers */); \
10996 result; \
10999 #define vset_lane_u32(a, b, c) \
11000 __extension__ \
11001 ({ \
11002 uint32x2_t b_ = (b); \
11003 uint32_t a_ = (a); \
11004 uint32x2_t result; \
11005 __asm__ ("ins %0.s[%3], %w1" \
11006 : "=w"(result) \
11007 : "r"(a_), "0"(b_), "i"(c) \
11008 : /* No clobbers */); \
11009 result; \
11012 #define vset_lane_u64(a, b, c) \
11013 __extension__ \
11014 ({ \
11015 uint64x1_t b_ = (b); \
11016 uint64_t a_ = (a); \
11017 uint64x1_t result; \
11018 __asm__ ("ins %0.d[%3], %x1" \
11019 : "=w"(result) \
11020 : "r"(a_), "0"(b_), "i"(c) \
11021 : /* No clobbers */); \
11022 result; \
11025 #define vsetq_lane_f32(a, b, c) \
11026 __extension__ \
11027 ({ \
11028 float32x4_t b_ = (b); \
11029 float32_t a_ = (a); \
11030 float32x4_t result; \
11031 __asm__ ("ins %0.s[%3], %w1" \
11032 : "=w"(result) \
11033 : "r"(a_), "0"(b_), "i"(c) \
11034 : /* No clobbers */); \
11035 result; \
11038 #define vsetq_lane_f64(a, b, c) \
11039 __extension__ \
11040 ({ \
11041 float64x2_t b_ = (b); \
11042 float64_t a_ = (a); \
11043 float64x2_t result; \
11044 __asm__ ("ins %0.d[%3], %x1" \
11045 : "=w"(result) \
11046 : "r"(a_), "0"(b_), "i"(c) \
11047 : /* No clobbers */); \
11048 result; \
11051 #define vsetq_lane_p8(a, b, c) \
11052 __extension__ \
11053 ({ \
11054 poly8x16_t b_ = (b); \
11055 poly8_t a_ = (a); \
11056 poly8x16_t result; \
11057 __asm__ ("ins %0.b[%3], %w1" \
11058 : "=w"(result) \
11059 : "r"(a_), "0"(b_), "i"(c) \
11060 : /* No clobbers */); \
11061 result; \
11064 #define vsetq_lane_p16(a, b, c) \
11065 __extension__ \
11066 ({ \
11067 poly16x8_t b_ = (b); \
11068 poly16_t a_ = (a); \
11069 poly16x8_t result; \
11070 __asm__ ("ins %0.h[%3], %w1" \
11071 : "=w"(result) \
11072 : "r"(a_), "0"(b_), "i"(c) \
11073 : /* No clobbers */); \
11074 result; \
11077 #define vsetq_lane_s8(a, b, c) \
11078 __extension__ \
11079 ({ \
11080 int8x16_t b_ = (b); \
11081 int8_t a_ = (a); \
11082 int8x16_t result; \
11083 __asm__ ("ins %0.b[%3], %w1" \
11084 : "=w"(result) \
11085 : "r"(a_), "0"(b_), "i"(c) \
11086 : /* No clobbers */); \
11087 result; \
11090 #define vsetq_lane_s16(a, b, c) \
11091 __extension__ \
11092 ({ \
11093 int16x8_t b_ = (b); \
11094 int16_t a_ = (a); \
11095 int16x8_t result; \
11096 __asm__ ("ins %0.h[%3], %w1" \
11097 : "=w"(result) \
11098 : "r"(a_), "0"(b_), "i"(c) \
11099 : /* No clobbers */); \
11100 result; \
11103 #define vsetq_lane_s32(a, b, c) \
11104 __extension__ \
11105 ({ \
11106 int32x4_t b_ = (b); \
11107 int32_t a_ = (a); \
11108 int32x4_t result; \
11109 __asm__ ("ins %0.s[%3], %w1" \
11110 : "=w"(result) \
11111 : "r"(a_), "0"(b_), "i"(c) \
11112 : /* No clobbers */); \
11113 result; \
11116 #define vsetq_lane_s64(a, b, c) \
11117 __extension__ \
11118 ({ \
11119 int64x2_t b_ = (b); \
11120 int64_t a_ = (a); \
11121 int64x2_t result; \
11122 __asm__ ("ins %0.d[%3], %x1" \
11123 : "=w"(result) \
11124 : "r"(a_), "0"(b_), "i"(c) \
11125 : /* No clobbers */); \
11126 result; \
11129 #define vsetq_lane_u8(a, b, c) \
11130 __extension__ \
11131 ({ \
11132 uint8x16_t b_ = (b); \
11133 uint8_t a_ = (a); \
11134 uint8x16_t result; \
11135 __asm__ ("ins %0.b[%3], %w1" \
11136 : "=w"(result) \
11137 : "r"(a_), "0"(b_), "i"(c) \
11138 : /* No clobbers */); \
11139 result; \
11142 #define vsetq_lane_u16(a, b, c) \
11143 __extension__ \
11144 ({ \
11145 uint16x8_t b_ = (b); \
11146 uint16_t a_ = (a); \
11147 uint16x8_t result; \
11148 __asm__ ("ins %0.h[%3], %w1" \
11149 : "=w"(result) \
11150 : "r"(a_), "0"(b_), "i"(c) \
11151 : /* No clobbers */); \
11152 result; \
11155 #define vsetq_lane_u32(a, b, c) \
11156 __extension__ \
11157 ({ \
11158 uint32x4_t b_ = (b); \
11159 uint32_t a_ = (a); \
11160 uint32x4_t result; \
11161 __asm__ ("ins %0.s[%3], %w1" \
11162 : "=w"(result) \
11163 : "r"(a_), "0"(b_), "i"(c) \
11164 : /* No clobbers */); \
11165 result; \
11168 #define vsetq_lane_u64(a, b, c) \
11169 __extension__ \
11170 ({ \
11171 uint64x2_t b_ = (b); \
11172 uint64_t a_ = (a); \
11173 uint64x2_t result; \
11174 __asm__ ("ins %0.d[%3], %x1" \
11175 : "=w"(result) \
11176 : "r"(a_), "0"(b_), "i"(c) \
11177 : /* No clobbers */); \
11178 result; \
11181 #define vshrn_high_n_s16(a, b, c) \
11182 __extension__ \
11183 ({ \
11184 int16x8_t b_ = (b); \
11185 int8x8_t a_ = (a); \
11186 int8x16_t result = vcombine_s8 \
11187 (a_, vcreate_s8 \
11188 (__AARCH64_UINT64_C (0x0))); \
11189 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11190 : "+w"(result) \
11191 : "w"(b_), "i"(c) \
11192 : /* No clobbers */); \
11193 result; \
11196 #define vshrn_high_n_s32(a, b, c) \
11197 __extension__ \
11198 ({ \
11199 int32x4_t b_ = (b); \
11200 int16x4_t a_ = (a); \
11201 int16x8_t result = vcombine_s16 \
11202 (a_, vcreate_s16 \
11203 (__AARCH64_UINT64_C (0x0))); \
11204 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11205 : "+w"(result) \
11206 : "w"(b_), "i"(c) \
11207 : /* No clobbers */); \
11208 result; \
11211 #define vshrn_high_n_s64(a, b, c) \
11212 __extension__ \
11213 ({ \
11214 int64x2_t b_ = (b); \
11215 int32x2_t a_ = (a); \
11216 int32x4_t result = vcombine_s32 \
11217 (a_, vcreate_s32 \
11218 (__AARCH64_UINT64_C (0x0))); \
11219 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11220 : "+w"(result) \
11221 : "w"(b_), "i"(c) \
11222 : /* No clobbers */); \
11223 result; \
11226 #define vshrn_high_n_u16(a, b, c) \
11227 __extension__ \
11228 ({ \
11229 uint16x8_t b_ = (b); \
11230 uint8x8_t a_ = (a); \
11231 uint8x16_t result = vcombine_u8 \
11232 (a_, vcreate_u8 \
11233 (__AARCH64_UINT64_C (0x0))); \
11234 __asm__ ("shrn2 %0.16b,%1.8h,#%2" \
11235 : "+w"(result) \
11236 : "w"(b_), "i"(c) \
11237 : /* No clobbers */); \
11238 result; \
11241 #define vshrn_high_n_u32(a, b, c) \
11242 __extension__ \
11243 ({ \
11244 uint32x4_t b_ = (b); \
11245 uint16x4_t a_ = (a); \
11246 uint16x8_t result = vcombine_u16 \
11247 (a_, vcreate_u16 \
11248 (__AARCH64_UINT64_C (0x0))); \
11249 __asm__ ("shrn2 %0.8h,%1.4s,#%2" \
11250 : "+w"(result) \
11251 : "w"(b_), "i"(c) \
11252 : /* No clobbers */); \
11253 result; \
11256 #define vshrn_high_n_u64(a, b, c) \
11257 __extension__ \
11258 ({ \
11259 uint64x2_t b_ = (b); \
11260 uint32x2_t a_ = (a); \
11261 uint32x4_t result = vcombine_u32 \
11262 (a_, vcreate_u32 \
11263 (__AARCH64_UINT64_C (0x0))); \
11264 __asm__ ("shrn2 %0.4s,%1.2d,#%2" \
11265 : "+w"(result) \
11266 : "w"(b_), "i"(c) \
11267 : /* No clobbers */); \
11268 result; \
11271 #define vshrn_n_s16(a, b) \
11272 __extension__ \
11273 ({ \
11274 int16x8_t a_ = (a); \
11275 int8x8_t result; \
11276 __asm__ ("shrn %0.8b,%1.8h,%2" \
11277 : "=w"(result) \
11278 : "w"(a_), "i"(b) \
11279 : /* No clobbers */); \
11280 result; \
11283 #define vshrn_n_s32(a, b) \
11284 __extension__ \
11285 ({ \
11286 int32x4_t a_ = (a); \
11287 int16x4_t result; \
11288 __asm__ ("shrn %0.4h,%1.4s,%2" \
11289 : "=w"(result) \
11290 : "w"(a_), "i"(b) \
11291 : /* No clobbers */); \
11292 result; \
11295 #define vshrn_n_s64(a, b) \
11296 __extension__ \
11297 ({ \
11298 int64x2_t a_ = (a); \
11299 int32x2_t result; \
11300 __asm__ ("shrn %0.2s,%1.2d,%2" \
11301 : "=w"(result) \
11302 : "w"(a_), "i"(b) \
11303 : /* No clobbers */); \
11304 result; \
11307 #define vshrn_n_u16(a, b) \
11308 __extension__ \
11309 ({ \
11310 uint16x8_t a_ = (a); \
11311 uint8x8_t result; \
11312 __asm__ ("shrn %0.8b,%1.8h,%2" \
11313 : "=w"(result) \
11314 : "w"(a_), "i"(b) \
11315 : /* No clobbers */); \
11316 result; \
11319 #define vshrn_n_u32(a, b) \
11320 __extension__ \
11321 ({ \
11322 uint32x4_t a_ = (a); \
11323 uint16x4_t result; \
11324 __asm__ ("shrn %0.4h,%1.4s,%2" \
11325 : "=w"(result) \
11326 : "w"(a_), "i"(b) \
11327 : /* No clobbers */); \
11328 result; \
11331 #define vshrn_n_u64(a, b) \
11332 __extension__ \
11333 ({ \
11334 uint64x2_t a_ = (a); \
11335 uint32x2_t result; \
11336 __asm__ ("shrn %0.2s,%1.2d,%2" \
11337 : "=w"(result) \
11338 : "w"(a_), "i"(b) \
11339 : /* No clobbers */); \
11340 result; \
11343 #define vsli_n_p8(a, b, c) \
11344 __extension__ \
11345 ({ \
11346 poly8x8_t b_ = (b); \
11347 poly8x8_t a_ = (a); \
11348 poly8x8_t result; \
11349 __asm__ ("sli %0.8b,%2.8b,%3" \
11350 : "=w"(result) \
11351 : "0"(a_), "w"(b_), "i"(c) \
11352 : /* No clobbers */); \
11353 result; \
11356 #define vsli_n_p16(a, b, c) \
11357 __extension__ \
11358 ({ \
11359 poly16x4_t b_ = (b); \
11360 poly16x4_t a_ = (a); \
11361 poly16x4_t result; \
11362 __asm__ ("sli %0.4h,%2.4h,%3" \
11363 : "=w"(result) \
11364 : "0"(a_), "w"(b_), "i"(c) \
11365 : /* No clobbers */); \
11366 result; \
11369 #define vsliq_n_p8(a, b, c) \
11370 __extension__ \
11371 ({ \
11372 poly8x16_t b_ = (b); \
11373 poly8x16_t a_ = (a); \
11374 poly8x16_t result; \
11375 __asm__ ("sli %0.16b,%2.16b,%3" \
11376 : "=w"(result) \
11377 : "0"(a_), "w"(b_), "i"(c) \
11378 : /* No clobbers */); \
11379 result; \
11382 #define vsliq_n_p16(a, b, c) \
11383 __extension__ \
11384 ({ \
11385 poly16x8_t b_ = (b); \
11386 poly16x8_t a_ = (a); \
11387 poly16x8_t result; \
11388 __asm__ ("sli %0.8h,%2.8h,%3" \
11389 : "=w"(result) \
11390 : "0"(a_), "w"(b_), "i"(c) \
11391 : /* No clobbers */); \
11392 result; \
11395 #define vsri_n_p8(a, b, c) \
11396 __extension__ \
11397 ({ \
11398 poly8x8_t b_ = (b); \
11399 poly8x8_t a_ = (a); \
11400 poly8x8_t result; \
11401 __asm__ ("sri %0.8b,%2.8b,%3" \
11402 : "=w"(result) \
11403 : "0"(a_), "w"(b_), "i"(c) \
11404 : /* No clobbers */); \
11405 result; \
11408 #define vsri_n_p16(a, b, c) \
11409 __extension__ \
11410 ({ \
11411 poly16x4_t b_ = (b); \
11412 poly16x4_t a_ = (a); \
11413 poly16x4_t result; \
11414 __asm__ ("sri %0.4h,%2.4h,%3" \
11415 : "=w"(result) \
11416 : "0"(a_), "w"(b_), "i"(c) \
11417 : /* No clobbers */); \
11418 result; \
11421 #define vsriq_n_p8(a, b, c) \
11422 __extension__ \
11423 ({ \
11424 poly8x16_t b_ = (b); \
11425 poly8x16_t a_ = (a); \
11426 poly8x16_t result; \
11427 __asm__ ("sri %0.16b,%2.16b,%3" \
11428 : "=w"(result) \
11429 : "0"(a_), "w"(b_), "i"(c) \
11430 : /* No clobbers */); \
11431 result; \
11434 #define vsriq_n_p16(a, b, c) \
11435 __extension__ \
11436 ({ \
11437 poly16x8_t b_ = (b); \
11438 poly16x8_t a_ = (a); \
11439 poly16x8_t result; \
11440 __asm__ ("sri %0.8h,%2.8h,%3" \
11441 : "=w"(result) \
11442 : "0"(a_), "w"(b_), "i"(c) \
11443 : /* No clobbers */); \
11444 result; \
11447 #define vst1_lane_f32(a, b, c) \
11448 __extension__ \
11449 ({ \
11450 float32x2_t b_ = (b); \
11451 float32_t * a_ = (a); \
11452 __asm__ ("st1 {%1.s}[%2],[%0]" \
11454 : "r"(a_), "w"(b_), "i"(c) \
11455 : "memory"); \
11458 #define vst1_lane_f64(a, b, c) \
11459 __extension__ \
11460 ({ \
11461 float64x1_t b_ = (b); \
11462 float64_t * a_ = (a); \
11463 __asm__ ("st1 {%1.d}[%2],[%0]" \
11465 : "r"(a_), "w"(b_), "i"(c) \
11466 : "memory"); \
11469 #define vst1_lane_p8(a, b, c) \
11470 __extension__ \
11471 ({ \
11472 poly8x8_t b_ = (b); \
11473 poly8_t * a_ = (a); \
11474 __asm__ ("st1 {%1.b}[%2],[%0]" \
11476 : "r"(a_), "w"(b_), "i"(c) \
11477 : "memory"); \
11480 #define vst1_lane_p16(a, b, c) \
11481 __extension__ \
11482 ({ \
11483 poly16x4_t b_ = (b); \
11484 poly16_t * a_ = (a); \
11485 __asm__ ("st1 {%1.h}[%2],[%0]" \
11487 : "r"(a_), "w"(b_), "i"(c) \
11488 : "memory"); \
11491 #define vst1_lane_s8(a, b, c) \
11492 __extension__ \
11493 ({ \
11494 int8x8_t b_ = (b); \
11495 int8_t * a_ = (a); \
11496 __asm__ ("st1 {%1.b}[%2],[%0]" \
11498 : "r"(a_), "w"(b_), "i"(c) \
11499 : "memory"); \
11502 #define vst1_lane_s16(a, b, c) \
11503 __extension__ \
11504 ({ \
11505 int16x4_t b_ = (b); \
11506 int16_t * a_ = (a); \
11507 __asm__ ("st1 {%1.h}[%2],[%0]" \
11509 : "r"(a_), "w"(b_), "i"(c) \
11510 : "memory"); \
11513 #define vst1_lane_s32(a, b, c) \
11514 __extension__ \
11515 ({ \
11516 int32x2_t b_ = (b); \
11517 int32_t * a_ = (a); \
11518 __asm__ ("st1 {%1.s}[%2],[%0]" \
11520 : "r"(a_), "w"(b_), "i"(c) \
11521 : "memory"); \
11524 #define vst1_lane_s64(a, b, c) \
11525 __extension__ \
11526 ({ \
11527 int64x1_t b_ = (b); \
11528 int64_t * a_ = (a); \
11529 __asm__ ("st1 {%1.d}[%2],[%0]" \
11531 : "r"(a_), "w"(b_), "i"(c) \
11532 : "memory"); \
11535 #define vst1_lane_u8(a, b, c) \
11536 __extension__ \
11537 ({ \
11538 uint8x8_t b_ = (b); \
11539 uint8_t * a_ = (a); \
11540 __asm__ ("st1 {%1.b}[%2],[%0]" \
11542 : "r"(a_), "w"(b_), "i"(c) \
11543 : "memory"); \
11546 #define vst1_lane_u16(a, b, c) \
11547 __extension__ \
11548 ({ \
11549 uint16x4_t b_ = (b); \
11550 uint16_t * a_ = (a); \
11551 __asm__ ("st1 {%1.h}[%2],[%0]" \
11553 : "r"(a_), "w"(b_), "i"(c) \
11554 : "memory"); \
11557 #define vst1_lane_u32(a, b, c) \
11558 __extension__ \
11559 ({ \
11560 uint32x2_t b_ = (b); \
11561 uint32_t * a_ = (a); \
11562 __asm__ ("st1 {%1.s}[%2],[%0]" \
11564 : "r"(a_), "w"(b_), "i"(c) \
11565 : "memory"); \
11568 #define vst1_lane_u64(a, b, c) \
11569 __extension__ \
11570 ({ \
11571 uint64x1_t b_ = (b); \
11572 uint64_t * a_ = (a); \
11573 __asm__ ("st1 {%1.d}[%2],[%0]" \
11575 : "r"(a_), "w"(b_), "i"(c) \
11576 : "memory"); \
11580 #define vst1q_lane_f32(a, b, c) \
11581 __extension__ \
11582 ({ \
11583 float32x4_t b_ = (b); \
11584 float32_t * a_ = (a); \
11585 __asm__ ("st1 {%1.s}[%2],[%0]" \
11587 : "r"(a_), "w"(b_), "i"(c) \
11588 : "memory"); \
11591 #define vst1q_lane_f64(a, b, c) \
11592 __extension__ \
11593 ({ \
11594 float64x2_t b_ = (b); \
11595 float64_t * a_ = (a); \
11596 __asm__ ("st1 {%1.d}[%2],[%0]" \
11598 : "r"(a_), "w"(b_), "i"(c) \
11599 : "memory"); \
11602 #define vst1q_lane_p8(a, b, c) \
11603 __extension__ \
11604 ({ \
11605 poly8x16_t b_ = (b); \
11606 poly8_t * a_ = (a); \
11607 __asm__ ("st1 {%1.b}[%2],[%0]" \
11609 : "r"(a_), "w"(b_), "i"(c) \
11610 : "memory"); \
11613 #define vst1q_lane_p16(a, b, c) \
11614 __extension__ \
11615 ({ \
11616 poly16x8_t b_ = (b); \
11617 poly16_t * a_ = (a); \
11618 __asm__ ("st1 {%1.h}[%2],[%0]" \
11620 : "r"(a_), "w"(b_), "i"(c) \
11621 : "memory"); \
11624 #define vst1q_lane_s8(a, b, c) \
11625 __extension__ \
11626 ({ \
11627 int8x16_t b_ = (b); \
11628 int8_t * a_ = (a); \
11629 __asm__ ("st1 {%1.b}[%2],[%0]" \
11631 : "r"(a_), "w"(b_), "i"(c) \
11632 : "memory"); \
11635 #define vst1q_lane_s16(a, b, c) \
11636 __extension__ \
11637 ({ \
11638 int16x8_t b_ = (b); \
11639 int16_t * a_ = (a); \
11640 __asm__ ("st1 {%1.h}[%2],[%0]" \
11642 : "r"(a_), "w"(b_), "i"(c) \
11643 : "memory"); \
11646 #define vst1q_lane_s32(a, b, c) \
11647 __extension__ \
11648 ({ \
11649 int32x4_t b_ = (b); \
11650 int32_t * a_ = (a); \
11651 __asm__ ("st1 {%1.s}[%2],[%0]" \
11653 : "r"(a_), "w"(b_), "i"(c) \
11654 : "memory"); \
11657 #define vst1q_lane_s64(a, b, c) \
11658 __extension__ \
11659 ({ \
11660 int64x2_t b_ = (b); \
11661 int64_t * a_ = (a); \
11662 __asm__ ("st1 {%1.d}[%2],[%0]" \
11664 : "r"(a_), "w"(b_), "i"(c) \
11665 : "memory"); \
11668 #define vst1q_lane_u8(a, b, c) \
11669 __extension__ \
11670 ({ \
11671 uint8x16_t b_ = (b); \
11672 uint8_t * a_ = (a); \
11673 __asm__ ("st1 {%1.b}[%2],[%0]" \
11675 : "r"(a_), "w"(b_), "i"(c) \
11676 : "memory"); \
11679 #define vst1q_lane_u16(a, b, c) \
11680 __extension__ \
11681 ({ \
11682 uint16x8_t b_ = (b); \
11683 uint16_t * a_ = (a); \
11684 __asm__ ("st1 {%1.h}[%2],[%0]" \
11686 : "r"(a_), "w"(b_), "i"(c) \
11687 : "memory"); \
11690 #define vst1q_lane_u32(a, b, c) \
11691 __extension__ \
11692 ({ \
11693 uint32x4_t b_ = (b); \
11694 uint32_t * a_ = (a); \
11695 __asm__ ("st1 {%1.s}[%2],[%0]" \
11697 : "r"(a_), "w"(b_), "i"(c) \
11698 : "memory"); \
11701 #define vst1q_lane_u64(a, b, c) \
11702 __extension__ \
11703 ({ \
11704 uint64x2_t b_ = (b); \
11705 uint64_t * a_ = (a); \
11706 __asm__ ("st1 {%1.d}[%2],[%0]" \
11708 : "r"(a_), "w"(b_), "i"(c) \
11709 : "memory"); \
11712 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
11713 vsubhn_high_s16 (int8x8_t a, int16x8_t b, int16x8_t c)
11715 int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
11716 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
11717 : "+w"(result)
11718 : "w"(b), "w"(c)
11719 : /* No clobbers */);
11720 return result;
11723 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
11724 vsubhn_high_s32 (int16x4_t a, int32x4_t b, int32x4_t c)
11726 int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0)));
11727 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
11728 : "+w"(result)
11729 : "w"(b), "w"(c)
11730 : /* No clobbers */);
11731 return result;
11734 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
11735 vsubhn_high_s64 (int32x2_t a, int64x2_t b, int64x2_t c)
11737 int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0)));
11738 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
11739 : "+w"(result)
11740 : "w"(b), "w"(c)
11741 : /* No clobbers */);
11742 return result;
11745 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11746 vsubhn_high_u16 (uint8x8_t a, uint16x8_t b, uint16x8_t c)
11748 uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
11749 __asm__ ("subhn2 %0.16b, %1.8h, %2.8h"
11750 : "+w"(result)
11751 : "w"(b), "w"(c)
11752 : /* No clobbers */);
11753 return result;
11756 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11757 vsubhn_high_u32 (uint16x4_t a, uint32x4_t b, uint32x4_t c)
11759 uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0)));
11760 __asm__ ("subhn2 %0.8h, %1.4s, %2.4s"
11761 : "+w"(result)
11762 : "w"(b), "w"(c)
11763 : /* No clobbers */);
11764 return result;
11767 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
11768 vsubhn_high_u64 (uint32x2_t a, uint64x2_t b, uint64x2_t c)
11770 uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0)));
11771 __asm__ ("subhn2 %0.4s, %1.2d, %2.2d"
11772 : "+w"(result)
11773 : "w"(b), "w"(c)
11774 : /* No clobbers */);
11775 return result;
11778 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
11779 vsubhn_s16 (int16x8_t a, int16x8_t b)
11781 int8x8_t result;
11782 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
11783 : "=w"(result)
11784 : "w"(a), "w"(b)
11785 : /* No clobbers */);
11786 return result;
11789 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
11790 vsubhn_s32 (int32x4_t a, int32x4_t b)
11792 int16x4_t result;
11793 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
11794 : "=w"(result)
11795 : "w"(a), "w"(b)
11796 : /* No clobbers */);
11797 return result;
11800 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
11801 vsubhn_s64 (int64x2_t a, int64x2_t b)
11803 int32x2_t result;
11804 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
11805 : "=w"(result)
11806 : "w"(a), "w"(b)
11807 : /* No clobbers */);
11808 return result;
11811 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11812 vsubhn_u16 (uint16x8_t a, uint16x8_t b)
11814 uint8x8_t result;
11815 __asm__ ("subhn %0.8b, %1.8h, %2.8h"
11816 : "=w"(result)
11817 : "w"(a), "w"(b)
11818 : /* No clobbers */);
11819 return result;
11822 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11823 vsubhn_u32 (uint32x4_t a, uint32x4_t b)
11825 uint16x4_t result;
11826 __asm__ ("subhn %0.4h, %1.4s, %2.4s"
11827 : "=w"(result)
11828 : "w"(a), "w"(b)
11829 : /* No clobbers */);
11830 return result;
11833 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
11834 vsubhn_u64 (uint64x2_t a, uint64x2_t b)
11836 uint32x2_t result;
11837 __asm__ ("subhn %0.2s, %1.2d, %2.2d"
11838 : "=w"(result)
11839 : "w"(a), "w"(b)
11840 : /* No clobbers */);
11841 return result;
11844 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
11845 vtst_p8 (poly8x8_t a, poly8x8_t b)
11847 uint8x8_t result;
11848 __asm__ ("cmtst %0.8b, %1.8b, %2.8b"
11849 : "=w"(result)
11850 : "w"(a), "w"(b)
11851 : /* No clobbers */);
11852 return result;
11855 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
11856 vtst_p16 (poly16x4_t a, poly16x4_t b)
11858 uint16x4_t result;
11859 __asm__ ("cmtst %0.4h, %1.4h, %2.4h"
11860 : "=w"(result)
11861 : "w"(a), "w"(b)
11862 : /* No clobbers */);
11863 return result;
11866 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
11867 vtstq_p8 (poly8x16_t a, poly8x16_t b)
11869 uint8x16_t result;
11870 __asm__ ("cmtst %0.16b, %1.16b, %2.16b"
11871 : "=w"(result)
11872 : "w"(a), "w"(b)
11873 : /* No clobbers */);
11874 return result;
11877 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
11878 vtstq_p16 (poly16x8_t a, poly16x8_t b)
11880 uint16x8_t result;
11881 __asm__ ("cmtst %0.8h, %1.8h, %2.8h"
11882 : "=w"(result)
11883 : "w"(a), "w"(b)
11884 : /* No clobbers */);
11885 return result;
11888 /* End of temporary inline asm implementations. */
11890 /* Start of temporary inline asm for vldn, vstn and friends. */
11892 /* Create struct element types for duplicating loads.
11894 Create 2 element structures of:
11896 +------+----+----+----+----+
11897 | | 8 | 16 | 32 | 64 |
11898 +------+----+----+----+----+
11899 |int | Y | Y | N | N |
11900 +------+----+----+----+----+
11901 |uint | Y | Y | N | N |
11902 +------+----+----+----+----+
11903 |float | - | - | N | N |
11904 +------+----+----+----+----+
11905 |poly | Y | Y | - | - |
11906 +------+----+----+----+----+
11908 Create 3 element structures of:
11910 +------+----+----+----+----+
11911 | | 8 | 16 | 32 | 64 |
11912 +------+----+----+----+----+
11913 |int | Y | Y | Y | Y |
11914 +------+----+----+----+----+
11915 |uint | Y | Y | Y | Y |
11916 +------+----+----+----+----+
11917 |float | - | - | Y | Y |
11918 +------+----+----+----+----+
11919 |poly | Y | Y | - | - |
11920 +------+----+----+----+----+
11922 Create 4 element structures of:
11924 +------+----+----+----+----+
11925 | | 8 | 16 | 32 | 64 |
11926 +------+----+----+----+----+
11927 |int | Y | N | N | Y |
11928 +------+----+----+----+----+
11929 |uint | Y | N | N | Y |
11930 +------+----+----+----+----+
11931 |float | - | - | N | Y |
11932 +------+----+----+----+----+
11933 |poly | Y | N | - | - |
11934 +------+----+----+----+----+
11936 This is required for casting memory reference. */
11937 #define __STRUCTN(t, sz, nelem) \
11938 typedef struct t ## sz ## x ## nelem ## _t { \
11939 t ## sz ## _t val[nelem]; \
11940 } t ## sz ## x ## nelem ## _t;
11942 /* 2-element structs. */
11943 __STRUCTN (int, 8, 2)
11944 __STRUCTN (int, 16, 2)
11945 __STRUCTN (uint, 8, 2)
11946 __STRUCTN (uint, 16, 2)
11947 __STRUCTN (poly, 8, 2)
11948 __STRUCTN (poly, 16, 2)
11949 /* 3-element structs. */
11950 __STRUCTN (int, 8, 3)
11951 __STRUCTN (int, 16, 3)
11952 __STRUCTN (int, 32, 3)
11953 __STRUCTN (int, 64, 3)
11954 __STRUCTN (uint, 8, 3)
11955 __STRUCTN (uint, 16, 3)
11956 __STRUCTN (uint, 32, 3)
11957 __STRUCTN (uint, 64, 3)
11958 __STRUCTN (float, 32, 3)
11959 __STRUCTN (float, 64, 3)
11960 __STRUCTN (poly, 8, 3)
11961 __STRUCTN (poly, 16, 3)
11962 /* 4-element structs. */
11963 __STRUCTN (int, 8, 4)
11964 __STRUCTN (int, 64, 4)
11965 __STRUCTN (uint, 8, 4)
11966 __STRUCTN (uint, 64, 4)
11967 __STRUCTN (poly, 8, 4)
11968 __STRUCTN (float, 64, 4)
11969 #undef __STRUCTN
11971 #define __LD2R_FUNC(rettype, structtype, ptrtype, \
11972 regsuffix, funcsuffix, Q) \
11973 __extension__ static __inline rettype \
11974 __attribute__ ((__always_inline__)) \
11975 vld2 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
11977 rettype result; \
11978 __asm__ ("ld2r {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
11979 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
11980 : "=Q"(result) \
11981 : "Q"(*(const structtype *)ptr) \
11982 : "memory", "v16", "v17"); \
11983 return result; \
11986 __LD2R_FUNC (float32x2x2_t, float32x2_t, float32_t, 2s, f32,)
11987 __LD2R_FUNC (float64x1x2_t, float64x2_t, float64_t, 1d, f64,)
11988 __LD2R_FUNC (poly8x8x2_t, poly8x2_t, poly8_t, 8b, p8,)
11989 __LD2R_FUNC (poly16x4x2_t, poly16x2_t, poly16_t, 4h, p16,)
11990 __LD2R_FUNC (int8x8x2_t, int8x2_t, int8_t, 8b, s8,)
11991 __LD2R_FUNC (int16x4x2_t, int16x2_t, int16_t, 4h, s16,)
11992 __LD2R_FUNC (int32x2x2_t, int32x2_t, int32_t, 2s, s32,)
11993 __LD2R_FUNC (int64x1x2_t, int64x2_t, int64_t, 1d, s64,)
11994 __LD2R_FUNC (uint8x8x2_t, uint8x2_t, uint8_t, 8b, u8,)
11995 __LD2R_FUNC (uint16x4x2_t, uint16x2_t, uint16_t, 4h, u16,)
11996 __LD2R_FUNC (uint32x2x2_t, uint32x2_t, uint32_t, 2s, u32,)
11997 __LD2R_FUNC (uint64x1x2_t, uint64x2_t, uint64_t, 1d, u64,)
11998 __LD2R_FUNC (float32x4x2_t, float32x2_t, float32_t, 4s, f32, q)
11999 __LD2R_FUNC (float64x2x2_t, float64x2_t, float64_t, 2d, f64, q)
12000 __LD2R_FUNC (poly8x16x2_t, poly8x2_t, poly8_t, 16b, p8, q)
12001 __LD2R_FUNC (poly16x8x2_t, poly16x2_t, poly16_t, 8h, p16, q)
12002 __LD2R_FUNC (int8x16x2_t, int8x2_t, int8_t, 16b, s8, q)
12003 __LD2R_FUNC (int16x8x2_t, int16x2_t, int16_t, 8h, s16, q)
12004 __LD2R_FUNC (int32x4x2_t, int32x2_t, int32_t, 4s, s32, q)
12005 __LD2R_FUNC (int64x2x2_t, int64x2_t, int64_t, 2d, s64, q)
12006 __LD2R_FUNC (uint8x16x2_t, uint8x2_t, uint8_t, 16b, u8, q)
12007 __LD2R_FUNC (uint16x8x2_t, uint16x2_t, uint16_t, 8h, u16, q)
12008 __LD2R_FUNC (uint32x4x2_t, uint32x2_t, uint32_t, 4s, u32, q)
12009 __LD2R_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, 2d, u64, q)
12011 #define __LD2_LANE_FUNC(rettype, ptrtype, regsuffix, \
12012 lnsuffix, funcsuffix, Q) \
12013 __extension__ static __inline rettype \
12014 __attribute__ ((__always_inline__)) \
12015 vld2 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
12016 rettype b, const int c) \
12018 rettype result; \
12019 __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \
12020 "ld2 {v16." #lnsuffix ", v17." #lnsuffix "}[%3], %2\n\t" \
12021 "st1 {v16." #regsuffix ", v17." #regsuffix "}, %0\n\t" \
12022 : "=Q"(result) \
12023 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
12024 : "memory", "v16", "v17"); \
12025 return result; \
12028 __LD2_LANE_FUNC (int8x8x2_t, uint8_t, 8b, b, s8,)
12029 __LD2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,)
12030 __LD2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,)
12031 __LD2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,)
12032 __LD2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,)
12033 __LD2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,)
12034 __LD2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,)
12035 __LD2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,)
12036 __LD2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,)
12037 __LD2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,)
12038 __LD2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,)
12039 __LD2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,)
12040 __LD2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q)
12041 __LD2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q)
12042 __LD2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q)
12043 __LD2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q)
12044 __LD2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q)
12045 __LD2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q)
12046 __LD2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q)
12047 __LD2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q)
12048 __LD2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q)
12049 __LD2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q)
12050 __LD2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q)
12051 __LD2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q)
12053 #define __LD3R_FUNC(rettype, structtype, ptrtype, \
12054 regsuffix, funcsuffix, Q) \
12055 __extension__ static __inline rettype \
12056 __attribute__ ((__always_inline__)) \
12057 vld3 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
12059 rettype result; \
12060 __asm__ ("ld3r {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
12061 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
12062 : "=Q"(result) \
12063 : "Q"(*(const structtype *)ptr) \
12064 : "memory", "v16", "v17", "v18"); \
12065 return result; \
12068 __LD3R_FUNC (float32x2x3_t, float32x3_t, float32_t, 2s, f32,)
12069 __LD3R_FUNC (float64x1x3_t, float64x3_t, float64_t, 1d, f64,)
12070 __LD3R_FUNC (poly8x8x3_t, poly8x3_t, poly8_t, 8b, p8,)
12071 __LD3R_FUNC (poly16x4x3_t, poly16x3_t, poly16_t, 4h, p16,)
12072 __LD3R_FUNC (int8x8x3_t, int8x3_t, int8_t, 8b, s8,)
12073 __LD3R_FUNC (int16x4x3_t, int16x3_t, int16_t, 4h, s16,)
12074 __LD3R_FUNC (int32x2x3_t, int32x3_t, int32_t, 2s, s32,)
12075 __LD3R_FUNC (int64x1x3_t, int64x3_t, int64_t, 1d, s64,)
12076 __LD3R_FUNC (uint8x8x3_t, uint8x3_t, uint8_t, 8b, u8,)
12077 __LD3R_FUNC (uint16x4x3_t, uint16x3_t, uint16_t, 4h, u16,)
12078 __LD3R_FUNC (uint32x2x3_t, uint32x3_t, uint32_t, 2s, u32,)
12079 __LD3R_FUNC (uint64x1x3_t, uint64x3_t, uint64_t, 1d, u64,)
12080 __LD3R_FUNC (float32x4x3_t, float32x3_t, float32_t, 4s, f32, q)
12081 __LD3R_FUNC (float64x2x3_t, float64x3_t, float64_t, 2d, f64, q)
12082 __LD3R_FUNC (poly8x16x3_t, poly8x3_t, poly8_t, 16b, p8, q)
12083 __LD3R_FUNC (poly16x8x3_t, poly16x3_t, poly16_t, 8h, p16, q)
12084 __LD3R_FUNC (int8x16x3_t, int8x3_t, int8_t, 16b, s8, q)
12085 __LD3R_FUNC (int16x8x3_t, int16x3_t, int16_t, 8h, s16, q)
12086 __LD3R_FUNC (int32x4x3_t, int32x3_t, int32_t, 4s, s32, q)
12087 __LD3R_FUNC (int64x2x3_t, int64x3_t, int64_t, 2d, s64, q)
12088 __LD3R_FUNC (uint8x16x3_t, uint8x3_t, uint8_t, 16b, u8, q)
12089 __LD3R_FUNC (uint16x8x3_t, uint16x3_t, uint16_t, 8h, u16, q)
12090 __LD3R_FUNC (uint32x4x3_t, uint32x3_t, uint32_t, 4s, u32, q)
12091 __LD3R_FUNC (uint64x2x3_t, uint64x3_t, uint64_t, 2d, u64, q)
12093 #define __LD3_LANE_FUNC(rettype, ptrtype, regsuffix, \
12094 lnsuffix, funcsuffix, Q) \
12095 __extension__ static __inline rettype \
12096 __attribute__ ((__always_inline__)) \
12097 vld3 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
12098 rettype b, const int c) \
12100 rettype result; \
12101 __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \
12102 "ld3 {v16." #lnsuffix " - v18." #lnsuffix "}[%3], %2\n\t" \
12103 "st1 {v16." #regsuffix " - v18." #regsuffix "}, %0\n\t" \
12104 : "=Q"(result) \
12105 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
12106 : "memory", "v16", "v17", "v18"); \
12107 return result; \
12110 __LD3_LANE_FUNC (int8x8x3_t, uint8_t, 8b, b, s8,)
12111 __LD3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,)
12112 __LD3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,)
12113 __LD3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,)
12114 __LD3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,)
12115 __LD3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,)
12116 __LD3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,)
12117 __LD3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,)
12118 __LD3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,)
12119 __LD3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,)
12120 __LD3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,)
12121 __LD3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,)
12122 __LD3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q)
12123 __LD3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q)
12124 __LD3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q)
12125 __LD3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q)
12126 __LD3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q)
12127 __LD3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q)
12128 __LD3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q)
12129 __LD3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q)
12130 __LD3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q)
12131 __LD3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q)
12132 __LD3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q)
12133 __LD3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q)
12135 #define __LD4R_FUNC(rettype, structtype, ptrtype, \
12136 regsuffix, funcsuffix, Q) \
12137 __extension__ static __inline rettype \
12138 __attribute__ ((__always_inline__)) \
12139 vld4 ## Q ## _dup_ ## funcsuffix (const ptrtype *ptr) \
12141 rettype result; \
12142 __asm__ ("ld4r {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
12143 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
12144 : "=Q"(result) \
12145 : "Q"(*(const structtype *)ptr) \
12146 : "memory", "v16", "v17", "v18", "v19"); \
12147 return result; \
12150 __LD4R_FUNC (float32x2x4_t, float32x4_t, float32_t, 2s, f32,)
12151 __LD4R_FUNC (float64x1x4_t, float64x4_t, float64_t, 1d, f64,)
12152 __LD4R_FUNC (poly8x8x4_t, poly8x4_t, poly8_t, 8b, p8,)
12153 __LD4R_FUNC (poly16x4x4_t, poly16x4_t, poly16_t, 4h, p16,)
12154 __LD4R_FUNC (int8x8x4_t, int8x4_t, int8_t, 8b, s8,)
12155 __LD4R_FUNC (int16x4x4_t, int16x4_t, int16_t, 4h, s16,)
12156 __LD4R_FUNC (int32x2x4_t, int32x4_t, int32_t, 2s, s32,)
12157 __LD4R_FUNC (int64x1x4_t, int64x4_t, int64_t, 1d, s64,)
12158 __LD4R_FUNC (uint8x8x4_t, uint8x4_t, uint8_t, 8b, u8,)
12159 __LD4R_FUNC (uint16x4x4_t, uint16x4_t, uint16_t, 4h, u16,)
12160 __LD4R_FUNC (uint32x2x4_t, uint32x4_t, uint32_t, 2s, u32,)
12161 __LD4R_FUNC (uint64x1x4_t, uint64x4_t, uint64_t, 1d, u64,)
12162 __LD4R_FUNC (float32x4x4_t, float32x4_t, float32_t, 4s, f32, q)
12163 __LD4R_FUNC (float64x2x4_t, float64x4_t, float64_t, 2d, f64, q)
12164 __LD4R_FUNC (poly8x16x4_t, poly8x4_t, poly8_t, 16b, p8, q)
12165 __LD4R_FUNC (poly16x8x4_t, poly16x4_t, poly16_t, 8h, p16, q)
12166 __LD4R_FUNC (int8x16x4_t, int8x4_t, int8_t, 16b, s8, q)
12167 __LD4R_FUNC (int16x8x4_t, int16x4_t, int16_t, 8h, s16, q)
12168 __LD4R_FUNC (int32x4x4_t, int32x4_t, int32_t, 4s, s32, q)
12169 __LD4R_FUNC (int64x2x4_t, int64x4_t, int64_t, 2d, s64, q)
12170 __LD4R_FUNC (uint8x16x4_t, uint8x4_t, uint8_t, 16b, u8, q)
12171 __LD4R_FUNC (uint16x8x4_t, uint16x4_t, uint16_t, 8h, u16, q)
12172 __LD4R_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, 4s, u32, q)
12173 __LD4R_FUNC (uint64x2x4_t, uint64x4_t, uint64_t, 2d, u64, q)
12175 #define __LD4_LANE_FUNC(rettype, ptrtype, regsuffix, \
12176 lnsuffix, funcsuffix, Q) \
12177 __extension__ static __inline rettype \
12178 __attribute__ ((__always_inline__)) \
12179 vld4 ## Q ## _lane_ ## funcsuffix (const ptrtype *ptr, \
12180 rettype b, const int c) \
12182 rettype result; \
12183 __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \
12184 "ld4 {v16." #lnsuffix " - v19." #lnsuffix "}[%3], %2\n\t" \
12185 "st1 {v16." #regsuffix " - v19." #regsuffix "}, %0\n\t" \
12186 : "=Q"(result) \
12187 : "Q"(b), "Q"(*(const rettype *)ptr), "i"(c) \
12188 : "memory", "v16", "v17", "v18", "v19"); \
12189 return result; \
12192 __LD4_LANE_FUNC (int8x8x4_t, uint8_t, 8b, b, s8,)
12193 __LD4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,)
12194 __LD4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,)
12195 __LD4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,)
12196 __LD4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,)
12197 __LD4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,)
12198 __LD4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,)
12199 __LD4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,)
12200 __LD4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,)
12201 __LD4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,)
12202 __LD4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,)
12203 __LD4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,)
12204 __LD4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q)
12205 __LD4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q)
12206 __LD4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q)
12207 __LD4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q)
12208 __LD4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q)
12209 __LD4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q)
12210 __LD4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q)
12211 __LD4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q)
12212 __LD4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q)
12213 __LD4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q)
12214 __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q)
12215 __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q)
12217 #define __ST2_LANE_FUNC(intype, largetype, ptrtype, \
12218 mode, ptr_mode, funcsuffix, signedtype) \
12219 __extension__ static __inline void \
12220 __attribute__ ((__always_inline__)) \
12221 vst2_lane_ ## funcsuffix (ptrtype *__ptr, \
12222 intype __b, const int __c) \
12224 __builtin_aarch64_simd_oi __o; \
12225 largetype __temp; \
12226 __temp.val[0] \
12227 = vcombine_##funcsuffix (__b.val[0], \
12228 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12229 __temp.val[1] \
12230 = vcombine_##funcsuffix (__b.val[1], \
12231 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12232 __o = __builtin_aarch64_set_qregoi##mode (__o, \
12233 (signedtype) __temp.val[0], 0); \
12234 __o = __builtin_aarch64_set_qregoi##mode (__o, \
12235 (signedtype) __temp.val[1], 1); \
12236 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12237 __ptr, __o, __c); \
12240 __ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32,
12241 float32x4_t)
12242 __ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64,
12243 float64x2_t)
12244 __ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t)
12245 __ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16,
12246 int16x8_t)
12247 __ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t)
12248 __ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t)
12249 __ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t)
12250 __ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t)
12251 __ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t)
12252 __ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16,
12253 int16x8_t)
12254 __ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32,
12255 int32x4_t)
12256 __ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64,
12257 int64x2_t)
12259 #undef __ST2_LANE_FUNC
12260 #define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
12261 __extension__ static __inline void \
12262 __attribute__ ((__always_inline__)) \
12263 vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \
12264 intype __b, const int __c) \
12266 union { intype __i; \
12267 __builtin_aarch64_simd_oi __o; } __temp = { __b }; \
12268 __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12269 __ptr, __temp.__o, __c); \
12272 __ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32)
12273 __ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64)
12274 __ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8)
12275 __ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16)
12276 __ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8)
12277 __ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16)
12278 __ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32)
12279 __ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64)
12280 __ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8)
12281 __ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16)
12282 __ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32)
12283 __ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64)
12285 #define __ST3_LANE_FUNC(intype, largetype, ptrtype, \
12286 mode, ptr_mode, funcsuffix, signedtype) \
12287 __extension__ static __inline void \
12288 __attribute__ ((__always_inline__)) \
12289 vst3_lane_ ## funcsuffix (ptrtype *__ptr, \
12290 intype __b, const int __c) \
12292 __builtin_aarch64_simd_ci __o; \
12293 largetype __temp; \
12294 __temp.val[0] \
12295 = vcombine_##funcsuffix (__b.val[0], \
12296 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12297 __temp.val[1] \
12298 = vcombine_##funcsuffix (__b.val[1], \
12299 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12300 __temp.val[2] \
12301 = vcombine_##funcsuffix (__b.val[2], \
12302 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12303 __o = __builtin_aarch64_set_qregci##mode (__o, \
12304 (signedtype) __temp.val[0], 0); \
12305 __o = __builtin_aarch64_set_qregci##mode (__o, \
12306 (signedtype) __temp.val[1], 1); \
12307 __o = __builtin_aarch64_set_qregci##mode (__o, \
12308 (signedtype) __temp.val[2], 2); \
12309 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12310 __ptr, __o, __c); \
12313 __ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32,
12314 float32x4_t)
12315 __ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64,
12316 float64x2_t)
12317 __ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t)
12318 __ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16,
12319 int16x8_t)
12320 __ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t)
12321 __ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t)
12322 __ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t)
12323 __ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t)
12324 __ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t)
12325 __ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16,
12326 int16x8_t)
12327 __ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32,
12328 int32x4_t)
12329 __ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64,
12330 int64x2_t)
12332 #undef __ST3_LANE_FUNC
12333 #define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
12334 __extension__ static __inline void \
12335 __attribute__ ((__always_inline__)) \
12336 vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \
12337 intype __b, const int __c) \
12339 union { intype __i; \
12340 __builtin_aarch64_simd_ci __o; } __temp = { __b }; \
12341 __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12342 __ptr, __temp.__o, __c); \
12345 __ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32)
12346 __ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64)
12347 __ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8)
12348 __ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16)
12349 __ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8)
12350 __ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16)
12351 __ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32)
12352 __ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64)
12353 __ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8)
12354 __ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16)
12355 __ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32)
12356 __ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64)
12358 #define __ST4_LANE_FUNC(intype, largetype, ptrtype, \
12359 mode, ptr_mode, funcsuffix, signedtype) \
12360 __extension__ static __inline void \
12361 __attribute__ ((__always_inline__)) \
12362 vst4_lane_ ## funcsuffix (ptrtype *__ptr, \
12363 intype __b, const int __c) \
12365 __builtin_aarch64_simd_xi __o; \
12366 largetype __temp; \
12367 __temp.val[0] \
12368 = vcombine_##funcsuffix (__b.val[0], \
12369 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12370 __temp.val[1] \
12371 = vcombine_##funcsuffix (__b.val[1], \
12372 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12373 __temp.val[2] \
12374 = vcombine_##funcsuffix (__b.val[2], \
12375 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12376 __temp.val[3] \
12377 = vcombine_##funcsuffix (__b.val[3], \
12378 vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \
12379 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12380 (signedtype) __temp.val[0], 0); \
12381 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12382 (signedtype) __temp.val[1], 1); \
12383 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12384 (signedtype) __temp.val[2], 2); \
12385 __o = __builtin_aarch64_set_qregxi##mode (__o, \
12386 (signedtype) __temp.val[3], 3); \
12387 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12388 __ptr, __o, __c); \
12391 __ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32,
12392 float32x4_t)
12393 __ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64,
12394 float64x2_t)
12395 __ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t)
12396 __ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16,
12397 int16x8_t)
12398 __ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t)
12399 __ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t)
12400 __ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t)
12401 __ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t)
12402 __ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t)
12403 __ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16,
12404 int16x8_t)
12405 __ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32,
12406 int32x4_t)
12407 __ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64,
12408 int64x2_t)
12410 #undef __ST4_LANE_FUNC
12411 #define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \
12412 __extension__ static __inline void \
12413 __attribute__ ((__always_inline__)) \
12414 vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \
12415 intype __b, const int __c) \
12417 union { intype __i; \
12418 __builtin_aarch64_simd_xi __o; } __temp = { __b }; \
12419 __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \
12420 __ptr, __temp.__o, __c); \
12423 __ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32)
12424 __ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64)
12425 __ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8)
12426 __ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16)
12427 __ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8)
12428 __ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16)
12429 __ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32)
12430 __ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64)
12431 __ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8)
12432 __ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16)
12433 __ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32)
12434 __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64)
12436 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
12437 vaddlv_s32 (int32x2_t a)
12439 int64_t result;
12440 __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
12441 return result;
12444 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
12445 vaddlv_u32 (uint32x2_t a)
12447 uint64_t result;
12448 __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : );
12449 return result;
12452 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12453 vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
12455 return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c);
12458 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12459 vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
12461 return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c);
12464 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12465 vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
12467 return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c);
12470 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12471 vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
12473 return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c);
12476 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
12477 vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c)
12479 return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c);
12482 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
12483 vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c)
12485 return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c);
12488 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
12489 vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c)
12491 return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c);
12494 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
12495 vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c)
12497 return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c);
12500 /* Table intrinsics. */
12502 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12503 vqtbl1_p8 (poly8x16_t a, uint8x8_t b)
12505 poly8x8_t result;
12506 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12507 : "=w"(result)
12508 : "w"(a), "w"(b)
12509 : /* No clobbers */);
12510 return result;
12513 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12514 vqtbl1_s8 (int8x16_t a, uint8x8_t b)
12516 int8x8_t result;
12517 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12518 : "=w"(result)
12519 : "w"(a), "w"(b)
12520 : /* No clobbers */);
12521 return result;
12524 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12525 vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
12527 uint8x8_t result;
12528 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
12529 : "=w"(result)
12530 : "w"(a), "w"(b)
12531 : /* No clobbers */);
12532 return result;
12535 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12536 vqtbl1q_p8 (poly8x16_t a, uint8x16_t b)
12538 poly8x16_t result;
12539 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12540 : "=w"(result)
12541 : "w"(a), "w"(b)
12542 : /* No clobbers */);
12543 return result;
12546 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12547 vqtbl1q_s8 (int8x16_t a, uint8x16_t b)
12549 int8x16_t result;
12550 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12551 : "=w"(result)
12552 : "w"(a), "w"(b)
12553 : /* No clobbers */);
12554 return result;
12557 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12558 vqtbl1q_u8 (uint8x16_t a, uint8x16_t b)
12560 uint8x16_t result;
12561 __asm__ ("tbl %0.16b, {%1.16b}, %2.16b"
12562 : "=w"(result)
12563 : "w"(a), "w"(b)
12564 : /* No clobbers */);
12565 return result;
12568 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12569 vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx)
12571 int8x8_t result;
12572 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12573 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12574 :"=w"(result)
12575 :"Q"(tab),"w"(idx)
12576 :"memory", "v16", "v17");
12577 return result;
12580 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12581 vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx)
12583 uint8x8_t result;
12584 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12585 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12586 :"=w"(result)
12587 :"Q"(tab),"w"(idx)
12588 :"memory", "v16", "v17");
12589 return result;
12592 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12593 vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx)
12595 poly8x8_t result;
12596 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12597 "tbl %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12598 :"=w"(result)
12599 :"Q"(tab),"w"(idx)
12600 :"memory", "v16", "v17");
12601 return result;
12604 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12605 vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx)
12607 int8x16_t result;
12608 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12609 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12610 :"=w"(result)
12611 :"Q"(tab),"w"(idx)
12612 :"memory", "v16", "v17");
12613 return result;
12616 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12617 vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx)
12619 uint8x16_t result;
12620 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12621 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12622 :"=w"(result)
12623 :"Q"(tab),"w"(idx)
12624 :"memory", "v16", "v17");
12625 return result;
12628 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12629 vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx)
12631 poly8x16_t result;
12632 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12633 "tbl %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12634 :"=w"(result)
12635 :"Q"(tab),"w"(idx)
12636 :"memory", "v16", "v17");
12637 return result;
12640 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12641 vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx)
12643 int8x8_t result;
12644 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12645 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12646 :"=w"(result)
12647 :"Q"(tab),"w"(idx)
12648 :"memory", "v16", "v17", "v18");
12649 return result;
12652 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12653 vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx)
12655 uint8x8_t result;
12656 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12657 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12658 :"=w"(result)
12659 :"Q"(tab),"w"(idx)
12660 :"memory", "v16", "v17", "v18");
12661 return result;
12664 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12665 vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx)
12667 poly8x8_t result;
12668 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12669 "tbl %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12670 :"=w"(result)
12671 :"Q"(tab),"w"(idx)
12672 :"memory", "v16", "v17", "v18");
12673 return result;
12676 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12677 vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx)
12679 int8x16_t result;
12680 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12681 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12682 :"=w"(result)
12683 :"Q"(tab),"w"(idx)
12684 :"memory", "v16", "v17", "v18");
12685 return result;
12688 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12689 vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx)
12691 uint8x16_t result;
12692 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12693 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12694 :"=w"(result)
12695 :"Q"(tab),"w"(idx)
12696 :"memory", "v16", "v17", "v18");
12697 return result;
12700 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12701 vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx)
12703 poly8x16_t result;
12704 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12705 "tbl %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12706 :"=w"(result)
12707 :"Q"(tab),"w"(idx)
12708 :"memory", "v16", "v17", "v18");
12709 return result;
12712 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12713 vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx)
12715 int8x8_t result;
12716 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12717 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12718 :"=w"(result)
12719 :"Q"(tab),"w"(idx)
12720 :"memory", "v16", "v17", "v18", "v19");
12721 return result;
12724 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12725 vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx)
12727 uint8x8_t result;
12728 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12729 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12730 :"=w"(result)
12731 :"Q"(tab),"w"(idx)
12732 :"memory", "v16", "v17", "v18", "v19");
12733 return result;
12736 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12737 vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx)
12739 poly8x8_t result;
12740 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12741 "tbl %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
12742 :"=w"(result)
12743 :"Q"(tab),"w"(idx)
12744 :"memory", "v16", "v17", "v18", "v19");
12745 return result;
12749 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12750 vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx)
12752 int8x16_t result;
12753 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12754 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12755 :"=w"(result)
12756 :"Q"(tab),"w"(idx)
12757 :"memory", "v16", "v17", "v18", "v19");
12758 return result;
12761 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12762 vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx)
12764 uint8x16_t result;
12765 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12766 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12767 :"=w"(result)
12768 :"Q"(tab),"w"(idx)
12769 :"memory", "v16", "v17", "v18", "v19");
12770 return result;
12773 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12774 vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx)
12776 poly8x16_t result;
12777 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
12778 "tbl %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
12779 :"=w"(result)
12780 :"Q"(tab),"w"(idx)
12781 :"memory", "v16", "v17", "v18", "v19");
12782 return result;
12786 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12787 vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx)
12789 int8x8_t result = r;
12790 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12791 : "+w"(result)
12792 : "w"(tab), "w"(idx)
12793 : /* No clobbers */);
12794 return result;
12797 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12798 vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx)
12800 uint8x8_t result = r;
12801 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12802 : "+w"(result)
12803 : "w"(tab), "w"(idx)
12804 : /* No clobbers */);
12805 return result;
12808 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12809 vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx)
12811 poly8x8_t result = r;
12812 __asm__ ("tbx %0.8b,{%1.16b},%2.8b"
12813 : "+w"(result)
12814 : "w"(tab), "w"(idx)
12815 : /* No clobbers */);
12816 return result;
12819 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12820 vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx)
12822 int8x16_t result = r;
12823 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12824 : "+w"(result)
12825 : "w"(tab), "w"(idx)
12826 : /* No clobbers */);
12827 return result;
12830 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12831 vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx)
12833 uint8x16_t result = r;
12834 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12835 : "+w"(result)
12836 : "w"(tab), "w"(idx)
12837 : /* No clobbers */);
12838 return result;
12841 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12842 vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx)
12844 poly8x16_t result = r;
12845 __asm__ ("tbx %0.16b,{%1.16b},%2.16b"
12846 : "+w"(result)
12847 : "w"(tab), "w"(idx)
12848 : /* No clobbers */);
12849 return result;
12852 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12853 vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx)
12855 int8x8_t result = r;
12856 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12857 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12858 :"+w"(result)
12859 :"Q"(tab),"w"(idx)
12860 :"memory", "v16", "v17");
12861 return result;
12864 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12865 vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx)
12867 uint8x8_t result = r;
12868 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12869 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12870 :"+w"(result)
12871 :"Q"(tab),"w"(idx)
12872 :"memory", "v16", "v17");
12873 return result;
12876 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12877 vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx)
12879 poly8x8_t result = r;
12880 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12881 "tbx %0.8b, {v16.16b, v17.16b}, %2.8b\n\t"
12882 :"+w"(result)
12883 :"Q"(tab),"w"(idx)
12884 :"memory", "v16", "v17");
12885 return result;
12889 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12890 vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx)
12892 int8x16_t result = r;
12893 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12894 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12895 :"+w"(result)
12896 :"Q"(tab),"w"(idx)
12897 :"memory", "v16", "v17");
12898 return result;
12901 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12902 vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx)
12904 uint8x16_t result = r;
12905 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12906 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12907 :"+w"(result)
12908 :"Q"(tab),"w"(idx)
12909 :"memory", "v16", "v17");
12910 return result;
12913 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12914 vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx)
12916 poly8x16_t result = r;
12917 __asm__ ("ld1 {v16.16b, v17.16b}, %1\n\t"
12918 "tbx %0.16b, {v16.16b, v17.16b}, %2.16b\n\t"
12919 :"+w"(result)
12920 :"Q"(tab),"w"(idx)
12921 :"memory", "v16", "v17");
12922 return result;
12926 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
12927 vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx)
12929 int8x8_t result = r;
12930 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12931 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12932 :"+w"(result)
12933 :"Q"(tab),"w"(idx)
12934 :"memory", "v16", "v17", "v18");
12935 return result;
12938 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
12939 vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx)
12941 uint8x8_t result = r;
12942 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12943 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12944 :"+w"(result)
12945 :"Q"(tab),"w"(idx)
12946 :"memory", "v16", "v17", "v18");
12947 return result;
12950 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
12951 vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx)
12953 poly8x8_t result = r;
12954 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12955 "tbx %0.8b, {v16.16b - v18.16b}, %2.8b\n\t"
12956 :"+w"(result)
12957 :"Q"(tab),"w"(idx)
12958 :"memory", "v16", "v17", "v18");
12959 return result;
12963 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
12964 vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx)
12966 int8x16_t result = r;
12967 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12968 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12969 :"+w"(result)
12970 :"Q"(tab),"w"(idx)
12971 :"memory", "v16", "v17", "v18");
12972 return result;
12975 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
12976 vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx)
12978 uint8x16_t result = r;
12979 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12980 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12981 :"+w"(result)
12982 :"Q"(tab),"w"(idx)
12983 :"memory", "v16", "v17", "v18");
12984 return result;
12987 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
12988 vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx)
12990 poly8x16_t result = r;
12991 __asm__ ("ld1 {v16.16b - v18.16b}, %1\n\t"
12992 "tbx %0.16b, {v16.16b - v18.16b}, %2.16b\n\t"
12993 :"+w"(result)
12994 :"Q"(tab),"w"(idx)
12995 :"memory", "v16", "v17", "v18");
12996 return result;
13000 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13001 vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx)
13003 int8x8_t result = r;
13004 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13005 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
13006 :"+w"(result)
13007 :"Q"(tab),"w"(idx)
13008 :"memory", "v16", "v17", "v18", "v19");
13009 return result;
13012 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13013 vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx)
13015 uint8x8_t result = r;
13016 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13017 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
13018 :"+w"(result)
13019 :"Q"(tab),"w"(idx)
13020 :"memory", "v16", "v17", "v18", "v19");
13021 return result;
13024 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13025 vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx)
13027 poly8x8_t result = r;
13028 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13029 "tbx %0.8b, {v16.16b - v19.16b}, %2.8b\n\t"
13030 :"+w"(result)
13031 :"Q"(tab),"w"(idx)
13032 :"memory", "v16", "v17", "v18", "v19");
13033 return result;
13037 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13038 vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx)
13040 int8x16_t result = r;
13041 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13042 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
13043 :"+w"(result)
13044 :"Q"(tab),"w"(idx)
13045 :"memory", "v16", "v17", "v18", "v19");
13046 return result;
13049 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13050 vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx)
13052 uint8x16_t result = r;
13053 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13054 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
13055 :"+w"(result)
13056 :"Q"(tab),"w"(idx)
13057 :"memory", "v16", "v17", "v18", "v19");
13058 return result;
13061 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13062 vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx)
13064 poly8x16_t result = r;
13065 __asm__ ("ld1 {v16.16b - v19.16b}, %1\n\t"
13066 "tbx %0.16b, {v16.16b - v19.16b}, %2.16b\n\t"
13067 :"+w"(result)
13068 :"Q"(tab),"w"(idx)
13069 :"memory", "v16", "v17", "v18", "v19");
13070 return result;
13073 /* V7 legacy table intrinsics. */
13075 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13076 vtbl1_s8 (int8x8_t tab, int8x8_t idx)
13078 int8x8_t result;
13079 int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0)));
13080 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13081 : "=w"(result)
13082 : "w"(temp), "w"(idx)
13083 : /* No clobbers */);
13084 return result;
13087 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13088 vtbl1_u8 (uint8x8_t tab, uint8x8_t idx)
13090 uint8x8_t result;
13091 uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0)));
13092 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13093 : "=w"(result)
13094 : "w"(temp), "w"(idx)
13095 : /* No clobbers */);
13096 return result;
13099 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13100 vtbl1_p8 (poly8x8_t tab, uint8x8_t idx)
13102 poly8x8_t result;
13103 poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0)));
13104 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13105 : "=w"(result)
13106 : "w"(temp), "w"(idx)
13107 : /* No clobbers */);
13108 return result;
13111 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13112 vtbl2_s8 (int8x8x2_t tab, int8x8_t idx)
13114 int8x8_t result;
13115 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
13116 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13117 : "=w"(result)
13118 : "w"(temp), "w"(idx)
13119 : /* No clobbers */);
13120 return result;
13123 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13124 vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx)
13126 uint8x8_t result;
13127 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
13128 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13129 : "=w"(result)
13130 : "w"(temp), "w"(idx)
13131 : /* No clobbers */);
13132 return result;
13135 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13136 vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx)
13138 poly8x8_t result;
13139 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
13140 __asm__ ("tbl %0.8b, {%1.16b}, %2.8b"
13141 : "=w"(result)
13142 : "w"(temp), "w"(idx)
13143 : /* No clobbers */);
13144 return result;
13147 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13148 vtbl3_s8 (int8x8x3_t tab, int8x8_t idx)
13150 int8x8_t result;
13151 int8x16x2_t temp;
13152 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
13153 temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0)));
13154 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13155 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13156 : "=w"(result)
13157 : "Q"(temp), "w"(idx)
13158 : "v16", "v17", "memory");
13159 return result;
13162 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13163 vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx)
13165 uint8x8_t result;
13166 uint8x16x2_t temp;
13167 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
13168 temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0)));
13169 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13170 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13171 : "=w"(result)
13172 : "Q"(temp), "w"(idx)
13173 : "v16", "v17", "memory");
13174 return result;
13177 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13178 vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx)
13180 poly8x8_t result;
13181 poly8x16x2_t temp;
13182 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
13183 temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0)));
13184 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13185 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13186 : "=w"(result)
13187 : "Q"(temp), "w"(idx)
13188 : "v16", "v17", "memory");
13189 return result;
13192 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13193 vtbl4_s8 (int8x8x4_t tab, int8x8_t idx)
13195 int8x8_t result;
13196 int8x16x2_t temp;
13197 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
13198 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
13199 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13200 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13201 : "=w"(result)
13202 : "Q"(temp), "w"(idx)
13203 : "v16", "v17", "memory");
13204 return result;
13207 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13208 vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx)
13210 uint8x8_t result;
13211 uint8x16x2_t temp;
13212 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
13213 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
13214 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13215 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13216 : "=w"(result)
13217 : "Q"(temp), "w"(idx)
13218 : "v16", "v17", "memory");
13219 return result;
13222 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13223 vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx)
13225 poly8x8_t result;
13226 poly8x16x2_t temp;
13227 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
13228 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
13229 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13230 "tbl %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13231 : "=w"(result)
13232 : "Q"(temp), "w"(idx)
13233 : "v16", "v17", "memory");
13234 return result;
13237 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13238 vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx)
13240 int8x8_t result = r;
13241 int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]);
13242 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
13243 : "+w"(result)
13244 : "w"(temp), "w"(idx)
13245 : /* No clobbers */);
13246 return result;
13249 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13250 vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx)
13252 uint8x8_t result = r;
13253 uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]);
13254 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
13255 : "+w"(result)
13256 : "w"(temp), "w"(idx)
13257 : /* No clobbers */);
13258 return result;
13261 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13262 vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx)
13264 poly8x8_t result = r;
13265 poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]);
13266 __asm__ ("tbx %0.8b, {%1.16b}, %2.8b"
13267 : "+w"(result)
13268 : "w"(temp), "w"(idx)
13269 : /* No clobbers */);
13270 return result;
13273 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13274 vtbx4_s8 (int8x8_t r, int8x8x4_t tab, int8x8_t idx)
13276 int8x8_t result = r;
13277 int8x16x2_t temp;
13278 temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]);
13279 temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]);
13280 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13281 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13282 : "+w"(result)
13283 : "Q"(temp), "w"(idx)
13284 : "v16", "v17", "memory");
13285 return result;
13288 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13289 vtbx4_u8 (uint8x8_t r, uint8x8x4_t tab, uint8x8_t idx)
13291 uint8x8_t result = r;
13292 uint8x16x2_t temp;
13293 temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]);
13294 temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]);
13295 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13296 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13297 : "+w"(result)
13298 : "Q"(temp), "w"(idx)
13299 : "v16", "v17", "memory");
13300 return result;
13303 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13304 vtbx4_p8 (poly8x8_t r, poly8x8x4_t tab, uint8x8_t idx)
13306 poly8x8_t result = r;
13307 poly8x16x2_t temp;
13308 temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]);
13309 temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]);
13310 __asm__ ("ld1 {v16.16b - v17.16b }, %1\n\t"
13311 "tbx %0.8b, {v16.16b - v17.16b}, %2.8b\n\t"
13312 : "+w"(result)
13313 : "Q"(temp), "w"(idx)
13314 : "v16", "v17", "memory");
13315 return result;
13318 /* End of temporary inline asm. */
13320 /* Start of optimal implementations in approved order. */
13322 /* vabs */
13324 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13325 vabs_f32 (float32x2_t __a)
13327 return __builtin_aarch64_absv2sf (__a);
13330 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13331 vabs_f64 (float64x1_t __a)
13333 return (float64x1_t) {__builtin_fabs (__a[0])};
13336 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13337 vabs_s8 (int8x8_t __a)
13339 return __builtin_aarch64_absv8qi (__a);
13342 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13343 vabs_s16 (int16x4_t __a)
13345 return __builtin_aarch64_absv4hi (__a);
13348 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13349 vabs_s32 (int32x2_t __a)
13351 return __builtin_aarch64_absv2si (__a);
13354 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13355 vabs_s64 (int64x1_t __a)
13357 return (int64x1_t) {__builtin_aarch64_absdi (__a[0])};
13360 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13361 vabsq_f32 (float32x4_t __a)
13363 return __builtin_aarch64_absv4sf (__a);
13366 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13367 vabsq_f64 (float64x2_t __a)
13369 return __builtin_aarch64_absv2df (__a);
13372 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13373 vabsq_s8 (int8x16_t __a)
13375 return __builtin_aarch64_absv16qi (__a);
13378 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13379 vabsq_s16 (int16x8_t __a)
13381 return __builtin_aarch64_absv8hi (__a);
13384 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13385 vabsq_s32 (int32x4_t __a)
13387 return __builtin_aarch64_absv4si (__a);
13390 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13391 vabsq_s64 (int64x2_t __a)
13393 return __builtin_aarch64_absv2di (__a);
13396 /* vadd */
13398 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13399 vaddd_s64 (int64_t __a, int64_t __b)
13401 return __a + __b;
13404 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13405 vaddd_u64 (uint64_t __a, uint64_t __b)
13407 return __a + __b;
13410 /* vaddv */
13412 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13413 vaddv_s8 (int8x8_t __a)
13415 return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
13418 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
13419 vaddv_s16 (int16x4_t __a)
13421 return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
13424 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13425 vaddv_s32 (int32x2_t __a)
13427 return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
13430 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13431 vaddv_u8 (uint8x8_t __a)
13433 return vget_lane_u8 ((uint8x8_t)
13434 __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
13438 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13439 vaddv_u16 (uint16x4_t __a)
13441 return vget_lane_u16 ((uint16x4_t)
13442 __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
13446 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13447 vaddv_u32 (uint32x2_t __a)
13449 return vget_lane_u32 ((uint32x2_t)
13450 __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
13454 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
13455 vaddvq_s8 (int8x16_t __a)
13457 return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
13461 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
13462 vaddvq_s16 (int16x8_t __a)
13464 return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
13467 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
13468 vaddvq_s32 (int32x4_t __a)
13470 return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
13473 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
13474 vaddvq_s64 (int64x2_t __a)
13476 return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
13479 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
13480 vaddvq_u8 (uint8x16_t __a)
13482 return vgetq_lane_u8 ((uint8x16_t)
13483 __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
13487 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
13488 vaddvq_u16 (uint16x8_t __a)
13490 return vgetq_lane_u16 ((uint16x8_t)
13491 __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
13495 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13496 vaddvq_u32 (uint32x4_t __a)
13498 return vgetq_lane_u32 ((uint32x4_t)
13499 __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
13503 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13504 vaddvq_u64 (uint64x2_t __a)
13506 return vgetq_lane_u64 ((uint64x2_t)
13507 __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
13511 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13512 vaddv_f32 (float32x2_t __a)
13514 float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
13515 return vget_lane_f32 (__t, 0);
13518 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
13519 vaddvq_f32 (float32x4_t __a)
13521 float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
13522 return vgetq_lane_f32 (__t, 0);
13525 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
13526 vaddvq_f64 (float64x2_t __a)
13528 float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
13529 return vgetq_lane_f64 (__t, 0);
13532 /* vbsl */
13534 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
13535 vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c)
13537 return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c);
13540 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
13541 vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c)
13543 return (float64x1_t)
13544 { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) };
13547 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
13548 vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c)
13550 return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c);
13553 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
13554 vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c)
13556 return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c);
13559 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
13560 vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c)
13562 return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c);
13565 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
13566 vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c)
13568 return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c);
13571 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
13572 vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c)
13574 return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c);
13577 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
13578 vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c)
13580 return (int64x1_t)
13581 {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])};
13584 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13585 vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c)
13587 return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c);
13590 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13591 vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c)
13593 return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c);
13596 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13597 vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c)
13599 return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c);
13602 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13603 vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c)
13605 return (uint64x1_t)
13606 {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])};
13609 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
13610 vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c)
13612 return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c);
13615 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
13616 vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c)
13618 return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c);
13621 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
13622 vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c)
13624 return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c);
13627 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
13628 vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c)
13630 return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c);
13633 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
13634 vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c)
13636 return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c);
13639 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
13640 vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c)
13642 return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c);
13645 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
13646 vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c)
13648 return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c);
13651 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
13652 vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c)
13654 return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c);
13657 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13658 vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
13660 return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c);
13663 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13664 vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
13666 return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c);
13669 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13670 vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
13672 return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c);
13675 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13676 vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
13678 return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
13681 #ifdef __ARM_FEATURE_CRYPTO
13683 /* vaes */
13685 static __inline uint8x16_t
13686 vaeseq_u8 (uint8x16_t data, uint8x16_t key)
13688 return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
13691 static __inline uint8x16_t
13692 vaesdq_u8 (uint8x16_t data, uint8x16_t key)
13694 return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key);
13697 static __inline uint8x16_t
13698 vaesmcq_u8 (uint8x16_t data)
13700 return __builtin_aarch64_crypto_aesmcv16qi_uu (data);
13703 static __inline uint8x16_t
13704 vaesimcq_u8 (uint8x16_t data)
13706 return __builtin_aarch64_crypto_aesimcv16qi_uu (data);
13709 #endif
13711 /* vcage */
13713 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13714 vcage_f64 (float64x1_t __a, float64x1_t __b)
13716 return vabs_f64 (__a) >= vabs_f64 (__b);
13719 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13720 vcages_f32 (float32_t __a, float32_t __b)
13722 return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0;
13725 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13726 vcage_f32 (float32x2_t __a, float32x2_t __b)
13728 return vabs_f32 (__a) >= vabs_f32 (__b);
13731 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13732 vcageq_f32 (float32x4_t __a, float32x4_t __b)
13734 return vabsq_f32 (__a) >= vabsq_f32 (__b);
13737 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13738 vcaged_f64 (float64_t __a, float64_t __b)
13740 return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0;
13743 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13744 vcageq_f64 (float64x2_t __a, float64x2_t __b)
13746 return vabsq_f64 (__a) >= vabsq_f64 (__b);
13749 /* vcagt */
13751 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13752 vcagts_f32 (float32_t __a, float32_t __b)
13754 return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0;
13757 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13758 vcagt_f32 (float32x2_t __a, float32x2_t __b)
13760 return vabs_f32 (__a) > vabs_f32 (__b);
13763 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13764 vcagt_f64 (float64x1_t __a, float64x1_t __b)
13766 return vabs_f64 (__a) > vabs_f64 (__b);
13769 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13770 vcagtq_f32 (float32x4_t __a, float32x4_t __b)
13772 return vabsq_f32 (__a) > vabsq_f32 (__b);
13775 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13776 vcagtd_f64 (float64_t __a, float64_t __b)
13778 return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0;
13781 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13782 vcagtq_f64 (float64x2_t __a, float64x2_t __b)
13784 return vabsq_f64 (__a) > vabsq_f64 (__b);
13787 /* vcale */
13789 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13790 vcale_f32 (float32x2_t __a, float32x2_t __b)
13792 return vabs_f32 (__a) <= vabs_f32 (__b);
13795 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13796 vcale_f64 (float64x1_t __a, float64x1_t __b)
13798 return vabs_f64 (__a) <= vabs_f64 (__b);
13801 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13802 vcaled_f64 (float64_t __a, float64_t __b)
13804 return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0;
13807 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13808 vcales_f32 (float32_t __a, float32_t __b)
13810 return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0;
13813 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13814 vcaleq_f32 (float32x4_t __a, float32x4_t __b)
13816 return vabsq_f32 (__a) <= vabsq_f32 (__b);
13819 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13820 vcaleq_f64 (float64x2_t __a, float64x2_t __b)
13822 return vabsq_f64 (__a) <= vabsq_f64 (__b);
13825 /* vcalt */
13827 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13828 vcalt_f32 (float32x2_t __a, float32x2_t __b)
13830 return vabs_f32 (__a) < vabs_f32 (__b);
13833 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13834 vcalt_f64 (float64x1_t __a, float64x1_t __b)
13836 return vabs_f64 (__a) < vabs_f64 (__b);
13839 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
13840 vcaltd_f64 (float64_t __a, float64_t __b)
13842 return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0;
13845 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13846 vcaltq_f32 (float32x4_t __a, float32x4_t __b)
13848 return vabsq_f32 (__a) < vabsq_f32 (__b);
13851 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13852 vcaltq_f64 (float64x2_t __a, float64x2_t __b)
13854 return vabsq_f64 (__a) < vabsq_f64 (__b);
13857 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
13858 vcalts_f32 (float32_t __a, float32_t __b)
13860 return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0;
13863 /* vceq - vector. */
13865 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13866 vceq_f32 (float32x2_t __a, float32x2_t __b)
13868 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
13871 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13872 vceq_f64 (float64x1_t __a, float64x1_t __b)
13874 return (uint64x1_t) (__a == __b);
13877 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13878 vceq_p8 (poly8x8_t __a, poly8x8_t __b)
13880 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
13881 (int8x8_t) __b);
13884 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13885 vceq_s8 (int8x8_t __a, int8x8_t __b)
13887 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
13890 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13891 vceq_s16 (int16x4_t __a, int16x4_t __b)
13893 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
13896 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13897 vceq_s32 (int32x2_t __a, int32x2_t __b)
13899 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
13902 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13903 vceq_s64 (int64x1_t __a, int64x1_t __b)
13905 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
13908 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
13909 vceq_u8 (uint8x8_t __a, uint8x8_t __b)
13911 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
13912 (int8x8_t) __b);
13915 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
13916 vceq_u16 (uint16x4_t __a, uint16x4_t __b)
13918 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
13919 (int16x4_t) __b);
13922 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
13923 vceq_u32 (uint32x2_t __a, uint32x2_t __b)
13925 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
13926 (int32x2_t) __b);
13929 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
13930 vceq_u64 (uint64x1_t __a, uint64x1_t __b)
13932 return (uint64x1_t) {__a[0] == __b[0] ? -1ll : 0ll};
13935 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13936 vceqq_f32 (float32x4_t __a, float32x4_t __b)
13938 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
13941 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13942 vceqq_f64 (float64x2_t __a, float64x2_t __b)
13944 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
13947 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13948 vceqq_p8 (poly8x16_t __a, poly8x16_t __b)
13950 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
13951 (int8x16_t) __b);
13954 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13955 vceqq_s8 (int8x16_t __a, int8x16_t __b)
13957 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
13960 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13961 vceqq_s16 (int16x8_t __a, int16x8_t __b)
13963 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
13966 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13967 vceqq_s32 (int32x4_t __a, int32x4_t __b)
13969 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
13972 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
13973 vceqq_s64 (int64x2_t __a, int64x2_t __b)
13975 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
13978 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
13979 vceqq_u8 (uint8x16_t __a, uint8x16_t __b)
13981 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
13982 (int8x16_t) __b);
13985 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
13986 vceqq_u16 (uint16x8_t __a, uint16x8_t __b)
13988 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
13989 (int16x8_t) __b);
13992 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
13993 vceqq_u32 (uint32x4_t __a, uint32x4_t __b)
13995 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
13996 (int32x4_t) __b);
13999 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14000 vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
14002 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
14003 (int64x2_t) __b);
14006 /* vceq - scalar. */
14008 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14009 vceqs_f32 (float32_t __a, float32_t __b)
14011 return __a == __b ? -1 : 0;
14014 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14015 vceqd_s64 (int64_t __a, int64_t __b)
14017 return __a == __b ? -1ll : 0ll;
14020 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14021 vceqd_u64 (uint64_t __a, uint64_t __b)
14023 return __a == __b ? -1ll : 0ll;
14026 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14027 vceqd_f64 (float64_t __a, float64_t __b)
14029 return __a == __b ? -1ll : 0ll;
14032 /* vceqz - vector. */
14034 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14035 vceqz_f32 (float32x2_t __a)
14037 float32x2_t __b = {0.0f, 0.0f};
14038 return (uint32x2_t) __builtin_aarch64_cmeqv2sf (__a, __b);
14041 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14042 vceqz_f64 (float64x1_t __a)
14044 return (uint64x1_t) (__a == (float64x1_t) {0.0});
14047 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14048 vceqz_p8 (poly8x8_t __a)
14050 poly8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14051 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14052 (int8x8_t) __b);
14055 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14056 vceqz_s8 (int8x8_t __a)
14058 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14059 return (uint8x8_t) __builtin_aarch64_cmeqv8qi (__a, __b);
14062 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14063 vceqz_s16 (int16x4_t __a)
14065 int16x4_t __b = {0, 0, 0, 0};
14066 return (uint16x4_t) __builtin_aarch64_cmeqv4hi (__a, __b);
14069 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14070 vceqz_s32 (int32x2_t __a)
14072 int32x2_t __b = {0, 0};
14073 return (uint32x2_t) __builtin_aarch64_cmeqv2si (__a, __b);
14076 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14077 vceqz_s64 (int64x1_t __a)
14079 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
14082 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14083 vceqz_u8 (uint8x8_t __a)
14085 uint8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14086 return (uint8x8_t) __builtin_aarch64_cmeqv8qi ((int8x8_t) __a,
14087 (int8x8_t) __b);
14090 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14091 vceqz_u16 (uint16x4_t __a)
14093 uint16x4_t __b = {0, 0, 0, 0};
14094 return (uint16x4_t) __builtin_aarch64_cmeqv4hi ((int16x4_t) __a,
14095 (int16x4_t) __b);
14098 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14099 vceqz_u32 (uint32x2_t __a)
14101 uint32x2_t __b = {0, 0};
14102 return (uint32x2_t) __builtin_aarch64_cmeqv2si ((int32x2_t) __a,
14103 (int32x2_t) __b);
14106 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14107 vceqz_u64 (uint64x1_t __a)
14109 return (uint64x1_t) {__a[0] == 0ll ? -1ll : 0ll};
14112 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14113 vceqzq_f32 (float32x4_t __a)
14115 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14116 return (uint32x4_t) __builtin_aarch64_cmeqv4sf (__a, __b);
14119 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14120 vceqzq_f64 (float64x2_t __a)
14122 float64x2_t __b = {0.0, 0.0};
14123 return (uint64x2_t) __builtin_aarch64_cmeqv2df (__a, __b);
14126 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14127 vceqzq_p8 (poly8x16_t __a)
14129 poly8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14130 0, 0, 0, 0, 0, 0, 0, 0};
14131 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14132 (int8x16_t) __b);
14135 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14136 vceqzq_s8 (int8x16_t __a)
14138 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14139 0, 0, 0, 0, 0, 0, 0, 0};
14140 return (uint8x16_t) __builtin_aarch64_cmeqv16qi (__a, __b);
14143 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14144 vceqzq_s16 (int16x8_t __a)
14146 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14147 return (uint16x8_t) __builtin_aarch64_cmeqv8hi (__a, __b);
14150 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14151 vceqzq_s32 (int32x4_t __a)
14153 int32x4_t __b = {0, 0, 0, 0};
14154 return (uint32x4_t) __builtin_aarch64_cmeqv4si (__a, __b);
14157 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14158 vceqzq_s64 (int64x2_t __a)
14160 int64x2_t __b = {0, 0};
14161 return (uint64x2_t) __builtin_aarch64_cmeqv2di (__a, __b);
14164 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14165 vceqzq_u8 (uint8x16_t __a)
14167 uint8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14168 0, 0, 0, 0, 0, 0, 0, 0};
14169 return (uint8x16_t) __builtin_aarch64_cmeqv16qi ((int8x16_t) __a,
14170 (int8x16_t) __b);
14173 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14174 vceqzq_u16 (uint16x8_t __a)
14176 uint16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14177 return (uint16x8_t) __builtin_aarch64_cmeqv8hi ((int16x8_t) __a,
14178 (int16x8_t) __b);
14181 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14182 vceqzq_u32 (uint32x4_t __a)
14184 uint32x4_t __b = {0, 0, 0, 0};
14185 return (uint32x4_t) __builtin_aarch64_cmeqv4si ((int32x4_t) __a,
14186 (int32x4_t) __b);
14189 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14190 vceqzq_u64 (uint64x2_t __a)
14192 uint64x2_t __b = {0, 0};
14193 return (uint64x2_t) __builtin_aarch64_cmeqv2di ((int64x2_t) __a,
14194 (int64x2_t) __b);
14197 /* vceqz - scalar. */
14199 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14200 vceqzs_f32 (float32_t __a)
14202 return __a == 0.0f ? -1 : 0;
14205 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14206 vceqzd_s64 (int64_t __a)
14208 return __a == 0 ? -1ll : 0ll;
14211 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14212 vceqzd_u64 (uint64_t __a)
14214 return __a == 0 ? -1ll : 0ll;
14217 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14218 vceqzd_f64 (float64_t __a)
14220 return __a == 0.0 ? -1ll : 0ll;
14223 /* vcge - vector. */
14225 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14226 vcge_f32 (float32x2_t __a, float32x2_t __b)
14228 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
14231 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14232 vcge_f64 (float64x1_t __a, float64x1_t __b)
14234 return (uint64x1_t) (__a >= __b);
14237 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14238 vcge_s8 (int8x8_t __a, int8x8_t __b)
14240 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
14243 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14244 vcge_s16 (int16x4_t __a, int16x4_t __b)
14246 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
14249 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14250 vcge_s32 (int32x2_t __a, int32x2_t __b)
14252 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
14255 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14256 vcge_s64 (int64x1_t __a, int64x1_t __b)
14258 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
14261 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14262 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
14264 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __a,
14265 (int8x8_t) __b);
14268 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14269 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
14271 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __a,
14272 (int16x4_t) __b);
14275 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14276 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
14278 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __a,
14279 (int32x2_t) __b);
14282 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14283 vcge_u64 (uint64x1_t __a, uint64x1_t __b)
14285 return (uint64x1_t) {__a[0] >= __b[0] ? -1ll : 0ll};
14288 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14289 vcgeq_f32 (float32x4_t __a, float32x4_t __b)
14291 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
14294 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14295 vcgeq_f64 (float64x2_t __a, float64x2_t __b)
14297 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
14300 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14301 vcgeq_s8 (int8x16_t __a, int8x16_t __b)
14303 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
14306 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14307 vcgeq_s16 (int16x8_t __a, int16x8_t __b)
14309 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
14312 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14313 vcgeq_s32 (int32x4_t __a, int32x4_t __b)
14315 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
14318 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14319 vcgeq_s64 (int64x2_t __a, int64x2_t __b)
14321 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
14324 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14325 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
14327 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __a,
14328 (int8x16_t) __b);
14331 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14332 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
14334 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __a,
14335 (int16x8_t) __b);
14338 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14339 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
14341 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __a,
14342 (int32x4_t) __b);
14345 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14346 vcgeq_u64 (uint64x2_t __a, uint64x2_t __b)
14348 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __a,
14349 (int64x2_t) __b);
14352 /* vcge - scalar. */
14354 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14355 vcges_f32 (float32_t __a, float32_t __b)
14357 return __a >= __b ? -1 : 0;
14360 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14361 vcged_s64 (int64_t __a, int64_t __b)
14363 return __a >= __b ? -1ll : 0ll;
14366 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14367 vcged_u64 (uint64_t __a, uint64_t __b)
14369 return __a >= __b ? -1ll : 0ll;
14372 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14373 vcged_f64 (float64_t __a, float64_t __b)
14375 return __a >= __b ? -1ll : 0ll;
14378 /* vcgez - vector. */
14380 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14381 vcgez_f32 (float32x2_t __a)
14383 float32x2_t __b = {0.0f, 0.0f};
14384 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__a, __b);
14387 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14388 vcgez_f64 (float64x1_t __a)
14390 return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0});
14393 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14394 vcgez_s8 (int8x8_t __a)
14396 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14397 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__a, __b);
14400 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14401 vcgez_s16 (int16x4_t __a)
14403 int16x4_t __b = {0, 0, 0, 0};
14404 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__a, __b);
14407 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14408 vcgez_s32 (int32x2_t __a)
14410 int32x2_t __b = {0, 0};
14411 return (uint32x2_t) __builtin_aarch64_cmgev2si (__a, __b);
14414 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14415 vcgez_s64 (int64x1_t __a)
14417 return (uint64x1_t) {__a[0] >= 0ll ? -1ll : 0ll};
14420 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14421 vcgezq_f32 (float32x4_t __a)
14423 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14424 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__a, __b);
14427 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14428 vcgezq_f64 (float64x2_t __a)
14430 float64x2_t __b = {0.0, 0.0};
14431 return (uint64x2_t) __builtin_aarch64_cmgev2df (__a, __b);
14434 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14435 vcgezq_s8 (int8x16_t __a)
14437 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14438 0, 0, 0, 0, 0, 0, 0, 0};
14439 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__a, __b);
14442 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14443 vcgezq_s16 (int16x8_t __a)
14445 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14446 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__a, __b);
14449 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14450 vcgezq_s32 (int32x4_t __a)
14452 int32x4_t __b = {0, 0, 0, 0};
14453 return (uint32x4_t) __builtin_aarch64_cmgev4si (__a, __b);
14456 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14457 vcgezq_s64 (int64x2_t __a)
14459 int64x2_t __b = {0, 0};
14460 return (uint64x2_t) __builtin_aarch64_cmgev2di (__a, __b);
14463 /* vcgez - scalar. */
14465 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14466 vcgezs_f32 (float32_t __a)
14468 return __a >= 0.0f ? -1 : 0;
14471 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14472 vcgezd_s64 (int64_t __a)
14474 return __a >= 0 ? -1ll : 0ll;
14477 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14478 vcgezd_f64 (float64_t __a)
14480 return __a >= 0.0 ? -1ll : 0ll;
14483 /* vcgt - vector. */
14485 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14486 vcgt_f32 (float32x2_t __a, float32x2_t __b)
14488 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
14491 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14492 vcgt_f64 (float64x1_t __a, float64x1_t __b)
14494 return (uint64x1_t) (__a > __b);
14497 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14498 vcgt_s8 (int8x8_t __a, int8x8_t __b)
14500 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
14503 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14504 vcgt_s16 (int16x4_t __a, int16x4_t __b)
14506 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
14509 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14510 vcgt_s32 (int32x2_t __a, int32x2_t __b)
14512 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
14515 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14516 vcgt_s64 (int64x1_t __a, int64x1_t __b)
14518 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
14521 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14522 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
14524 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __a,
14525 (int8x8_t) __b);
14528 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14529 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
14531 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __a,
14532 (int16x4_t) __b);
14535 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14536 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
14538 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __a,
14539 (int32x2_t) __b);
14542 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14543 vcgt_u64 (uint64x1_t __a, uint64x1_t __b)
14545 return (uint64x1_t) (__a[0] > __b[0] ? -1ll : 0ll);
14548 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14549 vcgtq_f32 (float32x4_t __a, float32x4_t __b)
14551 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
14554 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14555 vcgtq_f64 (float64x2_t __a, float64x2_t __b)
14557 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
14560 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14561 vcgtq_s8 (int8x16_t __a, int8x16_t __b)
14563 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
14566 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14567 vcgtq_s16 (int16x8_t __a, int16x8_t __b)
14569 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
14572 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14573 vcgtq_s32 (int32x4_t __a, int32x4_t __b)
14575 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
14578 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14579 vcgtq_s64 (int64x2_t __a, int64x2_t __b)
14581 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
14584 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14585 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
14587 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __a,
14588 (int8x16_t) __b);
14591 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14592 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
14594 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __a,
14595 (int16x8_t) __b);
14598 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14599 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
14601 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __a,
14602 (int32x4_t) __b);
14605 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14606 vcgtq_u64 (uint64x2_t __a, uint64x2_t __b)
14608 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __a,
14609 (int64x2_t) __b);
14612 /* vcgt - scalar. */
14614 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14615 vcgts_f32 (float32_t __a, float32_t __b)
14617 return __a > __b ? -1 : 0;
14620 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14621 vcgtd_s64 (int64_t __a, int64_t __b)
14623 return __a > __b ? -1ll : 0ll;
14626 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14627 vcgtd_u64 (uint64_t __a, uint64_t __b)
14629 return __a > __b ? -1ll : 0ll;
14632 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14633 vcgtd_f64 (float64_t __a, float64_t __b)
14635 return __a > __b ? -1ll : 0ll;
14638 /* vcgtz - vector. */
14640 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14641 vcgtz_f32 (float32x2_t __a)
14643 float32x2_t __b = {0.0f, 0.0f};
14644 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__a, __b);
14647 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14648 vcgtz_f64 (float64x1_t __a)
14650 return (uint64x1_t) (__a > (float64x1_t) {0.0});
14653 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14654 vcgtz_s8 (int8x8_t __a)
14656 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14657 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__a, __b);
14660 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14661 vcgtz_s16 (int16x4_t __a)
14663 int16x4_t __b = {0, 0, 0, 0};
14664 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__a, __b);
14667 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14668 vcgtz_s32 (int32x2_t __a)
14670 int32x2_t __b = {0, 0};
14671 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__a, __b);
14674 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14675 vcgtz_s64 (int64x1_t __a)
14677 return (uint64x1_t) {__a[0] > 0ll ? -1ll : 0ll};
14680 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14681 vcgtzq_f32 (float32x4_t __a)
14683 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14684 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__a, __b);
14687 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14688 vcgtzq_f64 (float64x2_t __a)
14690 float64x2_t __b = {0.0, 0.0};
14691 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__a, __b);
14694 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14695 vcgtzq_s8 (int8x16_t __a)
14697 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14698 0, 0, 0, 0, 0, 0, 0, 0};
14699 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__a, __b);
14702 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14703 vcgtzq_s16 (int16x8_t __a)
14705 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14706 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__a, __b);
14709 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14710 vcgtzq_s32 (int32x4_t __a)
14712 int32x4_t __b = {0, 0, 0, 0};
14713 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__a, __b);
14716 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14717 vcgtzq_s64 (int64x2_t __a)
14719 int64x2_t __b = {0, 0};
14720 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__a, __b);
14723 /* vcgtz - scalar. */
14725 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14726 vcgtzs_f32 (float32_t __a)
14728 return __a > 0.0f ? -1 : 0;
14731 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14732 vcgtzd_s64 (int64_t __a)
14734 return __a > 0 ? -1ll : 0ll;
14737 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14738 vcgtzd_f64 (float64_t __a)
14740 return __a > 0.0 ? -1ll : 0ll;
14743 /* vcle - vector. */
14745 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14746 vcle_f32 (float32x2_t __a, float32x2_t __b)
14748 return (uint32x2_t) __builtin_aarch64_cmgev2sf (__b, __a);
14751 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14752 vcle_f64 (float64x1_t __a, float64x1_t __b)
14754 return (uint64x1_t) (__a <= __b);
14757 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14758 vcle_s8 (int8x8_t __a, int8x8_t __b)
14760 return (uint8x8_t) __builtin_aarch64_cmgev8qi (__b, __a);
14763 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14764 vcle_s16 (int16x4_t __a, int16x4_t __b)
14766 return (uint16x4_t) __builtin_aarch64_cmgev4hi (__b, __a);
14769 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14770 vcle_s32 (int32x2_t __a, int32x2_t __b)
14772 return (uint32x2_t) __builtin_aarch64_cmgev2si (__b, __a);
14775 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14776 vcle_s64 (int64x1_t __a, int64x1_t __b)
14778 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
14781 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14782 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
14784 return (uint8x8_t) __builtin_aarch64_cmgeuv8qi ((int8x8_t) __b,
14785 (int8x8_t) __a);
14788 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14789 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
14791 return (uint16x4_t) __builtin_aarch64_cmgeuv4hi ((int16x4_t) __b,
14792 (int16x4_t) __a);
14795 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14796 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
14798 return (uint32x2_t) __builtin_aarch64_cmgeuv2si ((int32x2_t) __b,
14799 (int32x2_t) __a);
14802 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14803 vcle_u64 (uint64x1_t __a, uint64x1_t __b)
14805 return (uint64x1_t) {__a[0] <= __b[0] ? -1ll : 0ll};
14808 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14809 vcleq_f32 (float32x4_t __a, float32x4_t __b)
14811 return (uint32x4_t) __builtin_aarch64_cmgev4sf (__b, __a);
14814 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14815 vcleq_f64 (float64x2_t __a, float64x2_t __b)
14817 return (uint64x2_t) __builtin_aarch64_cmgev2df (__b, __a);
14820 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14821 vcleq_s8 (int8x16_t __a, int8x16_t __b)
14823 return (uint8x16_t) __builtin_aarch64_cmgev16qi (__b, __a);
14826 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14827 vcleq_s16 (int16x8_t __a, int16x8_t __b)
14829 return (uint16x8_t) __builtin_aarch64_cmgev8hi (__b, __a);
14832 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14833 vcleq_s32 (int32x4_t __a, int32x4_t __b)
14835 return (uint32x4_t) __builtin_aarch64_cmgev4si (__b, __a);
14838 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14839 vcleq_s64 (int64x2_t __a, int64x2_t __b)
14841 return (uint64x2_t) __builtin_aarch64_cmgev2di (__b, __a);
14844 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14845 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
14847 return (uint8x16_t) __builtin_aarch64_cmgeuv16qi ((int8x16_t) __b,
14848 (int8x16_t) __a);
14851 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14852 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
14854 return (uint16x8_t) __builtin_aarch64_cmgeuv8hi ((int16x8_t) __b,
14855 (int16x8_t) __a);
14858 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14859 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
14861 return (uint32x4_t) __builtin_aarch64_cmgeuv4si ((int32x4_t) __b,
14862 (int32x4_t) __a);
14865 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14866 vcleq_u64 (uint64x2_t __a, uint64x2_t __b)
14868 return (uint64x2_t) __builtin_aarch64_cmgeuv2di ((int64x2_t) __b,
14869 (int64x2_t) __a);
14872 /* vcle - scalar. */
14874 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14875 vcles_f32 (float32_t __a, float32_t __b)
14877 return __a <= __b ? -1 : 0;
14880 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14881 vcled_s64 (int64_t __a, int64_t __b)
14883 return __a <= __b ? -1ll : 0ll;
14886 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14887 vcled_u64 (uint64_t __a, uint64_t __b)
14889 return __a <= __b ? -1ll : 0ll;
14892 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14893 vcled_f64 (float64_t __a, float64_t __b)
14895 return __a <= __b ? -1ll : 0ll;
14898 /* vclez - vector. */
14900 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14901 vclez_f32 (float32x2_t __a)
14903 float32x2_t __b = {0.0f, 0.0f};
14904 return (uint32x2_t) __builtin_aarch64_cmlev2sf (__a, __b);
14907 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14908 vclez_f64 (float64x1_t __a)
14910 return (uint64x1_t) (__a <= (float64x1_t) {0.0});
14913 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
14914 vclez_s8 (int8x8_t __a)
14916 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14917 return (uint8x8_t) __builtin_aarch64_cmlev8qi (__a, __b);
14920 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
14921 vclez_s16 (int16x4_t __a)
14923 int16x4_t __b = {0, 0, 0, 0};
14924 return (uint16x4_t) __builtin_aarch64_cmlev4hi (__a, __b);
14927 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
14928 vclez_s32 (int32x2_t __a)
14930 int32x2_t __b = {0, 0};
14931 return (uint32x2_t) __builtin_aarch64_cmlev2si (__a, __b);
14934 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
14935 vclez_s64 (int64x1_t __a)
14937 return (uint64x1_t) {__a[0] <= 0ll ? -1ll : 0ll};
14940 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14941 vclezq_f32 (float32x4_t __a)
14943 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
14944 return (uint32x4_t) __builtin_aarch64_cmlev4sf (__a, __b);
14947 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14948 vclezq_f64 (float64x2_t __a)
14950 float64x2_t __b = {0.0, 0.0};
14951 return (uint64x2_t) __builtin_aarch64_cmlev2df (__a, __b);
14954 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
14955 vclezq_s8 (int8x16_t __a)
14957 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
14958 0, 0, 0, 0, 0, 0, 0, 0};
14959 return (uint8x16_t) __builtin_aarch64_cmlev16qi (__a, __b);
14962 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
14963 vclezq_s16 (int16x8_t __a)
14965 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
14966 return (uint16x8_t) __builtin_aarch64_cmlev8hi (__a, __b);
14969 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
14970 vclezq_s32 (int32x4_t __a)
14972 int32x4_t __b = {0, 0, 0, 0};
14973 return (uint32x4_t) __builtin_aarch64_cmlev4si (__a, __b);
14976 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
14977 vclezq_s64 (int64x2_t __a)
14979 int64x2_t __b = {0, 0};
14980 return (uint64x2_t) __builtin_aarch64_cmlev2di (__a, __b);
14983 /* vclez - scalar. */
14985 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
14986 vclezs_f32 (float32_t __a)
14988 return __a <= 0.0f ? -1 : 0;
14991 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14992 vclezd_s64 (int64_t __a)
14994 return __a <= 0 ? -1ll : 0ll;
14997 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
14998 vclezd_f64 (float64_t __a)
15000 return __a <= 0.0 ? -1ll : 0ll;
15003 /* vclt - vector. */
15005 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15006 vclt_f32 (float32x2_t __a, float32x2_t __b)
15008 return (uint32x2_t) __builtin_aarch64_cmgtv2sf (__b, __a);
15011 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15012 vclt_f64 (float64x1_t __a, float64x1_t __b)
15014 return (uint64x1_t) (__a < __b);
15017 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15018 vclt_s8 (int8x8_t __a, int8x8_t __b)
15020 return (uint8x8_t) __builtin_aarch64_cmgtv8qi (__b, __a);
15023 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15024 vclt_s16 (int16x4_t __a, int16x4_t __b)
15026 return (uint16x4_t) __builtin_aarch64_cmgtv4hi (__b, __a);
15029 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15030 vclt_s32 (int32x2_t __a, int32x2_t __b)
15032 return (uint32x2_t) __builtin_aarch64_cmgtv2si (__b, __a);
15035 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15036 vclt_s64 (int64x1_t __a, int64x1_t __b)
15038 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
15041 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15042 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
15044 return (uint8x8_t) __builtin_aarch64_cmgtuv8qi ((int8x8_t) __b,
15045 (int8x8_t) __a);
15048 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15049 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
15051 return (uint16x4_t) __builtin_aarch64_cmgtuv4hi ((int16x4_t) __b,
15052 (int16x4_t) __a);
15055 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15056 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
15058 return (uint32x2_t) __builtin_aarch64_cmgtuv2si ((int32x2_t) __b,
15059 (int32x2_t) __a);
15062 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15063 vclt_u64 (uint64x1_t __a, uint64x1_t __b)
15065 return (uint64x1_t) {__a[0] < __b[0] ? -1ll : 0ll};
15068 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15069 vcltq_f32 (float32x4_t __a, float32x4_t __b)
15071 return (uint32x4_t) __builtin_aarch64_cmgtv4sf (__b, __a);
15074 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15075 vcltq_f64 (float64x2_t __a, float64x2_t __b)
15077 return (uint64x2_t) __builtin_aarch64_cmgtv2df (__b, __a);
15080 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15081 vcltq_s8 (int8x16_t __a, int8x16_t __b)
15083 return (uint8x16_t) __builtin_aarch64_cmgtv16qi (__b, __a);
15086 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15087 vcltq_s16 (int16x8_t __a, int16x8_t __b)
15089 return (uint16x8_t) __builtin_aarch64_cmgtv8hi (__b, __a);
15092 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15093 vcltq_s32 (int32x4_t __a, int32x4_t __b)
15095 return (uint32x4_t) __builtin_aarch64_cmgtv4si (__b, __a);
15098 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15099 vcltq_s64 (int64x2_t __a, int64x2_t __b)
15101 return (uint64x2_t) __builtin_aarch64_cmgtv2di (__b, __a);
15104 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15105 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
15107 return (uint8x16_t) __builtin_aarch64_cmgtuv16qi ((int8x16_t) __b,
15108 (int8x16_t) __a);
15111 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15112 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
15114 return (uint16x8_t) __builtin_aarch64_cmgtuv8hi ((int16x8_t) __b,
15115 (int16x8_t) __a);
15118 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15119 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
15121 return (uint32x4_t) __builtin_aarch64_cmgtuv4si ((int32x4_t) __b,
15122 (int32x4_t) __a);
15125 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15126 vcltq_u64 (uint64x2_t __a, uint64x2_t __b)
15128 return (uint64x2_t) __builtin_aarch64_cmgtuv2di ((int64x2_t) __b,
15129 (int64x2_t) __a);
15132 /* vclt - scalar. */
15134 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15135 vclts_f32 (float32_t __a, float32_t __b)
15137 return __a < __b ? -1 : 0;
15140 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15141 vcltd_s64 (int64_t __a, int64_t __b)
15143 return __a < __b ? -1ll : 0ll;
15146 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15147 vcltd_u64 (uint64_t __a, uint64_t __b)
15149 return __a < __b ? -1ll : 0ll;
15152 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15153 vcltd_f64 (float64_t __a, float64_t __b)
15155 return __a < __b ? -1ll : 0ll;
15158 /* vcltz - vector. */
15160 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15161 vcltz_f32 (float32x2_t __a)
15163 float32x2_t __b = {0.0f, 0.0f};
15164 return (uint32x2_t) __builtin_aarch64_cmltv2sf (__a, __b);
15167 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15168 vcltz_f64 (float64x1_t __a)
15170 return (uint64x1_t) (__a < (float64x1_t) {0.0});
15173 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15174 vcltz_s8 (int8x8_t __a)
15176 int8x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15177 return (uint8x8_t) __builtin_aarch64_cmltv8qi (__a, __b);
15180 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15181 vcltz_s16 (int16x4_t __a)
15183 int16x4_t __b = {0, 0, 0, 0};
15184 return (uint16x4_t) __builtin_aarch64_cmltv4hi (__a, __b);
15187 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15188 vcltz_s32 (int32x2_t __a)
15190 int32x2_t __b = {0, 0};
15191 return (uint32x2_t) __builtin_aarch64_cmltv2si (__a, __b);
15194 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15195 vcltz_s64 (int64x1_t __a)
15197 return (uint64x1_t) {__a[0] < 0ll ? -1ll : 0ll};
15200 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15201 vcltzq_f32 (float32x4_t __a)
15203 float32x4_t __b = {0.0f, 0.0f, 0.0f, 0.0f};
15204 return (uint32x4_t) __builtin_aarch64_cmltv4sf (__a, __b);
15207 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15208 vcltzq_f64 (float64x2_t __a)
15210 float64x2_t __b = {0.0, 0.0};
15211 return (uint64x2_t) __builtin_aarch64_cmltv2df (__a, __b);
15214 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15215 vcltzq_s8 (int8x16_t __a)
15217 int8x16_t __b = {0, 0, 0, 0, 0, 0, 0, 0,
15218 0, 0, 0, 0, 0, 0, 0, 0};
15219 return (uint8x16_t) __builtin_aarch64_cmltv16qi (__a, __b);
15222 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15223 vcltzq_s16 (int16x8_t __a)
15225 int16x8_t __b = {0, 0, 0, 0, 0, 0, 0, 0};
15226 return (uint16x8_t) __builtin_aarch64_cmltv8hi (__a, __b);
15229 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15230 vcltzq_s32 (int32x4_t __a)
15232 int32x4_t __b = {0, 0, 0, 0};
15233 return (uint32x4_t) __builtin_aarch64_cmltv4si (__a, __b);
15236 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15237 vcltzq_s64 (int64x2_t __a)
15239 int64x2_t __b = {0, 0};
15240 return (uint64x2_t) __builtin_aarch64_cmltv2di (__a, __b);
15243 /* vcltz - scalar. */
15245 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15246 vcltzs_f32 (float32_t __a)
15248 return __a < 0.0f ? -1 : 0;
15251 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15252 vcltzd_s64 (int64_t __a)
15254 return __a < 0 ? -1ll : 0ll;
15257 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15258 vcltzd_f64 (float64_t __a)
15260 return __a < 0.0 ? -1ll : 0ll;
15263 /* vclz. */
15265 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15266 vclz_s8 (int8x8_t __a)
15268 return __builtin_aarch64_clzv8qi (__a);
15271 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15272 vclz_s16 (int16x4_t __a)
15274 return __builtin_aarch64_clzv4hi (__a);
15277 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15278 vclz_s32 (int32x2_t __a)
15280 return __builtin_aarch64_clzv2si (__a);
15283 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15284 vclz_u8 (uint8x8_t __a)
15286 return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a);
15289 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15290 vclz_u16 (uint16x4_t __a)
15292 return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a);
15295 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15296 vclz_u32 (uint32x2_t __a)
15298 return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a);
15301 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15302 vclzq_s8 (int8x16_t __a)
15304 return __builtin_aarch64_clzv16qi (__a);
15307 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15308 vclzq_s16 (int16x8_t __a)
15310 return __builtin_aarch64_clzv8hi (__a);
15313 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15314 vclzq_s32 (int32x4_t __a)
15316 return __builtin_aarch64_clzv4si (__a);
15319 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15320 vclzq_u8 (uint8x16_t __a)
15322 return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a);
15325 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15326 vclzq_u16 (uint16x8_t __a)
15328 return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a);
15331 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15332 vclzq_u32 (uint32x4_t __a)
15334 return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a);
15337 /* vcvt (double -> float). */
15339 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15340 vcvt_f32_f64 (float64x2_t __a)
15342 return __builtin_aarch64_float_truncate_lo_v2sf (__a);
15345 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15346 vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
15348 return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b);
15351 /* vcvt (float -> double). */
15353 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15354 vcvt_f64_f32 (float32x2_t __a)
15357 return __builtin_aarch64_float_extend_lo_v2df (__a);
15360 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15361 vcvt_high_f64_f32 (float32x4_t __a)
15363 return __builtin_aarch64_vec_unpacks_hi_v4sf (__a);
15366 /* vcvt (<u>int -> float) */
15368 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15369 vcvtd_f64_s64 (int64_t __a)
15371 return (float64_t) __a;
15374 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
15375 vcvtd_f64_u64 (uint64_t __a)
15377 return (float64_t) __a;
15380 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15381 vcvts_f32_s32 (int32_t __a)
15383 return (float32_t) __a;
15386 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
15387 vcvts_f32_u32 (uint32_t __a)
15389 return (float32_t) __a;
15392 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15393 vcvt_f32_s32 (int32x2_t __a)
15395 return __builtin_aarch64_floatv2siv2sf (__a);
15398 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15399 vcvt_f32_u32 (uint32x2_t __a)
15401 return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a);
15404 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15405 vcvtq_f32_s32 (int32x4_t __a)
15407 return __builtin_aarch64_floatv4siv4sf (__a);
15410 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15411 vcvtq_f32_u32 (uint32x4_t __a)
15413 return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a);
15416 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15417 vcvtq_f64_s64 (int64x2_t __a)
15419 return __builtin_aarch64_floatv2div2df (__a);
15422 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15423 vcvtq_f64_u64 (uint64x2_t __a)
15425 return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a);
15428 /* vcvt (float -> <u>int) */
15430 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15431 vcvtd_s64_f64 (float64_t __a)
15433 return (int64_t) __a;
15436 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15437 vcvtd_u64_f64 (float64_t __a)
15439 return (uint64_t) __a;
15442 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15443 vcvts_s32_f32 (float32_t __a)
15445 return (int32_t) __a;
15448 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15449 vcvts_u32_f32 (float32_t __a)
15451 return (uint32_t) __a;
15454 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15455 vcvt_s32_f32 (float32x2_t __a)
15457 return __builtin_aarch64_lbtruncv2sfv2si (__a);
15460 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15461 vcvt_u32_f32 (float32x2_t __a)
15463 /* TODO: This cast should go away when builtins have
15464 their correct types. */
15465 return (uint32x2_t) __builtin_aarch64_lbtruncuv2sfv2si (__a);
15468 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15469 vcvtq_s32_f32 (float32x4_t __a)
15471 return __builtin_aarch64_lbtruncv4sfv4si (__a);
15474 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15475 vcvtq_u32_f32 (float32x4_t __a)
15477 /* TODO: This cast should go away when builtins have
15478 their correct types. */
15479 return (uint32x4_t) __builtin_aarch64_lbtruncuv4sfv4si (__a);
15482 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15483 vcvtq_s64_f64 (float64x2_t __a)
15485 return __builtin_aarch64_lbtruncv2dfv2di (__a);
15488 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15489 vcvtq_u64_f64 (float64x2_t __a)
15491 /* TODO: This cast should go away when builtins have
15492 their correct types. */
15493 return (uint64x2_t) __builtin_aarch64_lbtruncuv2dfv2di (__a);
15496 /* vcvta */
15498 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15499 vcvtad_s64_f64 (float64_t __a)
15501 return __builtin_aarch64_lrounddfdi (__a);
15504 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15505 vcvtad_u64_f64 (float64_t __a)
15507 return __builtin_aarch64_lroundudfdi (__a);
15510 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15511 vcvtas_s32_f32 (float32_t __a)
15513 return __builtin_aarch64_lroundsfsi (__a);
15516 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15517 vcvtas_u32_f32 (float32_t __a)
15519 return __builtin_aarch64_lroundusfsi (__a);
15522 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15523 vcvta_s32_f32 (float32x2_t __a)
15525 return __builtin_aarch64_lroundv2sfv2si (__a);
15528 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15529 vcvta_u32_f32 (float32x2_t __a)
15531 /* TODO: This cast should go away when builtins have
15532 their correct types. */
15533 return (uint32x2_t) __builtin_aarch64_lrounduv2sfv2si (__a);
15536 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15537 vcvtaq_s32_f32 (float32x4_t __a)
15539 return __builtin_aarch64_lroundv4sfv4si (__a);
15542 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15543 vcvtaq_u32_f32 (float32x4_t __a)
15545 /* TODO: This cast should go away when builtins have
15546 their correct types. */
15547 return (uint32x4_t) __builtin_aarch64_lrounduv4sfv4si (__a);
15550 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15551 vcvtaq_s64_f64 (float64x2_t __a)
15553 return __builtin_aarch64_lroundv2dfv2di (__a);
15556 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15557 vcvtaq_u64_f64 (float64x2_t __a)
15559 /* TODO: This cast should go away when builtins have
15560 their correct types. */
15561 return (uint64x2_t) __builtin_aarch64_lrounduv2dfv2di (__a);
15564 /* vcvtm */
15566 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15567 vcvtmd_s64_f64 (float64_t __a)
15569 return __builtin_llfloor (__a);
15572 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15573 vcvtmd_u64_f64 (float64_t __a)
15575 return __builtin_aarch64_lfloorudfdi (__a);
15578 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15579 vcvtms_s32_f32 (float32_t __a)
15581 return __builtin_ifloorf (__a);
15584 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15585 vcvtms_u32_f32 (float32_t __a)
15587 return __builtin_aarch64_lfloorusfsi (__a);
15590 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15591 vcvtm_s32_f32 (float32x2_t __a)
15593 return __builtin_aarch64_lfloorv2sfv2si (__a);
15596 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15597 vcvtm_u32_f32 (float32x2_t __a)
15599 /* TODO: This cast should go away when builtins have
15600 their correct types. */
15601 return (uint32x2_t) __builtin_aarch64_lflooruv2sfv2si (__a);
15604 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15605 vcvtmq_s32_f32 (float32x4_t __a)
15607 return __builtin_aarch64_lfloorv4sfv4si (__a);
15610 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15611 vcvtmq_u32_f32 (float32x4_t __a)
15613 /* TODO: This cast should go away when builtins have
15614 their correct types. */
15615 return (uint32x4_t) __builtin_aarch64_lflooruv4sfv4si (__a);
15618 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15619 vcvtmq_s64_f64 (float64x2_t __a)
15621 return __builtin_aarch64_lfloorv2dfv2di (__a);
15624 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15625 vcvtmq_u64_f64 (float64x2_t __a)
15627 /* TODO: This cast should go away when builtins have
15628 their correct types. */
15629 return (uint64x2_t) __builtin_aarch64_lflooruv2dfv2di (__a);
15632 /* vcvtn */
15634 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15635 vcvtnd_s64_f64 (float64_t __a)
15637 return __builtin_aarch64_lfrintndfdi (__a);
15640 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15641 vcvtnd_u64_f64 (float64_t __a)
15643 return __builtin_aarch64_lfrintnudfdi (__a);
15646 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15647 vcvtns_s32_f32 (float32_t __a)
15649 return __builtin_aarch64_lfrintnsfsi (__a);
15652 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15653 vcvtns_u32_f32 (float32_t __a)
15655 return __builtin_aarch64_lfrintnusfsi (__a);
15658 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15659 vcvtn_s32_f32 (float32x2_t __a)
15661 return __builtin_aarch64_lfrintnv2sfv2si (__a);
15664 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15665 vcvtn_u32_f32 (float32x2_t __a)
15667 /* TODO: This cast should go away when builtins have
15668 their correct types. */
15669 return (uint32x2_t) __builtin_aarch64_lfrintnuv2sfv2si (__a);
15672 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15673 vcvtnq_s32_f32 (float32x4_t __a)
15675 return __builtin_aarch64_lfrintnv4sfv4si (__a);
15678 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15679 vcvtnq_u32_f32 (float32x4_t __a)
15681 /* TODO: This cast should go away when builtins have
15682 their correct types. */
15683 return (uint32x4_t) __builtin_aarch64_lfrintnuv4sfv4si (__a);
15686 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15687 vcvtnq_s64_f64 (float64x2_t __a)
15689 return __builtin_aarch64_lfrintnv2dfv2di (__a);
15692 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15693 vcvtnq_u64_f64 (float64x2_t __a)
15695 /* TODO: This cast should go away when builtins have
15696 their correct types. */
15697 return (uint64x2_t) __builtin_aarch64_lfrintnuv2dfv2di (__a);
15700 /* vcvtp */
15702 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
15703 vcvtpd_s64_f64 (float64_t __a)
15705 return __builtin_llceil (__a);
15708 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
15709 vcvtpd_u64_f64 (float64_t __a)
15711 return __builtin_aarch64_lceiludfdi (__a);
15714 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
15715 vcvtps_s32_f32 (float32_t __a)
15717 return __builtin_iceilf (__a);
15720 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
15721 vcvtps_u32_f32 (float32_t __a)
15723 return __builtin_aarch64_lceilusfsi (__a);
15726 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15727 vcvtp_s32_f32 (float32x2_t __a)
15729 return __builtin_aarch64_lceilv2sfv2si (__a);
15732 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15733 vcvtp_u32_f32 (float32x2_t __a)
15735 /* TODO: This cast should go away when builtins have
15736 their correct types. */
15737 return (uint32x2_t) __builtin_aarch64_lceiluv2sfv2si (__a);
15740 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15741 vcvtpq_s32_f32 (float32x4_t __a)
15743 return __builtin_aarch64_lceilv4sfv4si (__a);
15746 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15747 vcvtpq_u32_f32 (float32x4_t __a)
15749 /* TODO: This cast should go away when builtins have
15750 their correct types. */
15751 return (uint32x4_t) __builtin_aarch64_lceiluv4sfv4si (__a);
15754 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15755 vcvtpq_s64_f64 (float64x2_t __a)
15757 return __builtin_aarch64_lceilv2dfv2di (__a);
15760 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15761 vcvtpq_u64_f64 (float64x2_t __a)
15763 /* TODO: This cast should go away when builtins have
15764 their correct types. */
15765 return (uint64x2_t) __builtin_aarch64_lceiluv2dfv2di (__a);
15768 /* vdup_n */
15770 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15771 vdup_n_f32 (float32_t __a)
15773 return (float32x2_t) {__a, __a};
15776 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15777 vdup_n_f64 (float64_t __a)
15779 return (float64x1_t) {__a};
15782 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15783 vdup_n_p8 (poly8_t __a)
15785 return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15788 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15789 vdup_n_p16 (poly16_t __a)
15791 return (poly16x4_t) {__a, __a, __a, __a};
15794 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15795 vdup_n_s8 (int8_t __a)
15797 return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15800 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15801 vdup_n_s16 (int16_t __a)
15803 return (int16x4_t) {__a, __a, __a, __a};
15806 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15807 vdup_n_s32 (int32_t __a)
15809 return (int32x2_t) {__a, __a};
15812 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15813 vdup_n_s64 (int64_t __a)
15815 return (int64x1_t) {__a};
15818 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15819 vdup_n_u8 (uint8_t __a)
15821 return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15824 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15825 vdup_n_u16 (uint16_t __a)
15827 return (uint16x4_t) {__a, __a, __a, __a};
15830 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15831 vdup_n_u32 (uint32_t __a)
15833 return (uint32x2_t) {__a, __a};
15836 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15837 vdup_n_u64 (uint64_t __a)
15839 return (uint64x1_t) {__a};
15842 /* vdupq_n */
15844 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
15845 vdupq_n_f32 (float32_t __a)
15847 return (float32x4_t) {__a, __a, __a, __a};
15850 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
15851 vdupq_n_f64 (float64_t __a)
15853 return (float64x2_t) {__a, __a};
15856 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
15857 vdupq_n_p8 (uint32_t __a)
15859 return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15860 __a, __a, __a, __a, __a, __a, __a, __a};
15863 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
15864 vdupq_n_p16 (uint32_t __a)
15866 return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15869 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
15870 vdupq_n_s8 (int32_t __a)
15872 return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15873 __a, __a, __a, __a, __a, __a, __a, __a};
15876 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
15877 vdupq_n_s16 (int32_t __a)
15879 return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15882 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
15883 vdupq_n_s32 (int32_t __a)
15885 return (int32x4_t) {__a, __a, __a, __a};
15888 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
15889 vdupq_n_s64 (int64_t __a)
15891 return (int64x2_t) {__a, __a};
15894 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
15895 vdupq_n_u8 (uint32_t __a)
15897 return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
15898 __a, __a, __a, __a, __a, __a, __a, __a};
15901 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
15902 vdupq_n_u16 (uint32_t __a)
15904 return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
15907 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
15908 vdupq_n_u32 (uint32_t __a)
15910 return (uint32x4_t) {__a, __a, __a, __a};
15913 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
15914 vdupq_n_u64 (uint64_t __a)
15916 return (uint64x2_t) {__a, __a};
15919 /* vdup_lane */
15921 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15922 vdup_lane_f32 (float32x2_t __a, const int __b)
15924 return __aarch64_vdup_lane_f32 (__a, __b);
15927 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
15928 vdup_lane_f64 (float64x1_t __a, const int __b)
15930 return __aarch64_vdup_lane_f64 (__a, __b);
15933 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
15934 vdup_lane_p8 (poly8x8_t __a, const int __b)
15936 return __aarch64_vdup_lane_p8 (__a, __b);
15939 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
15940 vdup_lane_p16 (poly16x4_t __a, const int __b)
15942 return __aarch64_vdup_lane_p16 (__a, __b);
15945 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
15946 vdup_lane_s8 (int8x8_t __a, const int __b)
15948 return __aarch64_vdup_lane_s8 (__a, __b);
15951 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
15952 vdup_lane_s16 (int16x4_t __a, const int __b)
15954 return __aarch64_vdup_lane_s16 (__a, __b);
15957 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
15958 vdup_lane_s32 (int32x2_t __a, const int __b)
15960 return __aarch64_vdup_lane_s32 (__a, __b);
15963 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
15964 vdup_lane_s64 (int64x1_t __a, const int __b)
15966 return __aarch64_vdup_lane_s64 (__a, __b);
15969 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
15970 vdup_lane_u8 (uint8x8_t __a, const int __b)
15972 return __aarch64_vdup_lane_u8 (__a, __b);
15975 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
15976 vdup_lane_u16 (uint16x4_t __a, const int __b)
15978 return __aarch64_vdup_lane_u16 (__a, __b);
15981 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
15982 vdup_lane_u32 (uint32x2_t __a, const int __b)
15984 return __aarch64_vdup_lane_u32 (__a, __b);
15987 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
15988 vdup_lane_u64 (uint64x1_t __a, const int __b)
15990 return __aarch64_vdup_lane_u64 (__a, __b);
15993 /* vdup_laneq */
15995 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
15996 vdup_laneq_f32 (float32x4_t __a, const int __b)
15998 return __aarch64_vdup_laneq_f32 (__a, __b);
16001 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16002 vdup_laneq_f64 (float64x2_t __a, const int __b)
16004 return __aarch64_vdup_laneq_f64 (__a, __b);
16007 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16008 vdup_laneq_p8 (poly8x16_t __a, const int __b)
16010 return __aarch64_vdup_laneq_p8 (__a, __b);
16013 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16014 vdup_laneq_p16 (poly16x8_t __a, const int __b)
16016 return __aarch64_vdup_laneq_p16 (__a, __b);
16019 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16020 vdup_laneq_s8 (int8x16_t __a, const int __b)
16022 return __aarch64_vdup_laneq_s8 (__a, __b);
16025 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16026 vdup_laneq_s16 (int16x8_t __a, const int __b)
16028 return __aarch64_vdup_laneq_s16 (__a, __b);
16031 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16032 vdup_laneq_s32 (int32x4_t __a, const int __b)
16034 return __aarch64_vdup_laneq_s32 (__a, __b);
16037 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16038 vdup_laneq_s64 (int64x2_t __a, const int __b)
16040 return __aarch64_vdup_laneq_s64 (__a, __b);
16043 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16044 vdup_laneq_u8 (uint8x16_t __a, const int __b)
16046 return __aarch64_vdup_laneq_u8 (__a, __b);
16049 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16050 vdup_laneq_u16 (uint16x8_t __a, const int __b)
16052 return __aarch64_vdup_laneq_u16 (__a, __b);
16055 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16056 vdup_laneq_u32 (uint32x4_t __a, const int __b)
16058 return __aarch64_vdup_laneq_u32 (__a, __b);
16061 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16062 vdup_laneq_u64 (uint64x2_t __a, const int __b)
16064 return __aarch64_vdup_laneq_u64 (__a, __b);
16067 /* vdupq_lane */
16068 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16069 vdupq_lane_f32 (float32x2_t __a, const int __b)
16071 return __aarch64_vdupq_lane_f32 (__a, __b);
16074 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16075 vdupq_lane_f64 (float64x1_t __a, const int __b)
16077 return __aarch64_vdupq_lane_f64 (__a, __b);
16080 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16081 vdupq_lane_p8 (poly8x8_t __a, const int __b)
16083 return __aarch64_vdupq_lane_p8 (__a, __b);
16086 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16087 vdupq_lane_p16 (poly16x4_t __a, const int __b)
16089 return __aarch64_vdupq_lane_p16 (__a, __b);
16092 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16093 vdupq_lane_s8 (int8x8_t __a, const int __b)
16095 return __aarch64_vdupq_lane_s8 (__a, __b);
16098 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16099 vdupq_lane_s16 (int16x4_t __a, const int __b)
16101 return __aarch64_vdupq_lane_s16 (__a, __b);
16104 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16105 vdupq_lane_s32 (int32x2_t __a, const int __b)
16107 return __aarch64_vdupq_lane_s32 (__a, __b);
16110 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16111 vdupq_lane_s64 (int64x1_t __a, const int __b)
16113 return __aarch64_vdupq_lane_s64 (__a, __b);
16116 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16117 vdupq_lane_u8 (uint8x8_t __a, const int __b)
16119 return __aarch64_vdupq_lane_u8 (__a, __b);
16122 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16123 vdupq_lane_u16 (uint16x4_t __a, const int __b)
16125 return __aarch64_vdupq_lane_u16 (__a, __b);
16128 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16129 vdupq_lane_u32 (uint32x2_t __a, const int __b)
16131 return __aarch64_vdupq_lane_u32 (__a, __b);
16134 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16135 vdupq_lane_u64 (uint64x1_t __a, const int __b)
16137 return __aarch64_vdupq_lane_u64 (__a, __b);
16140 /* vdupq_laneq */
16141 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16142 vdupq_laneq_f32 (float32x4_t __a, const int __b)
16144 return __aarch64_vdupq_laneq_f32 (__a, __b);
16147 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16148 vdupq_laneq_f64 (float64x2_t __a, const int __b)
16150 return __aarch64_vdupq_laneq_f64 (__a, __b);
16153 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16154 vdupq_laneq_p8 (poly8x16_t __a, const int __b)
16156 return __aarch64_vdupq_laneq_p8 (__a, __b);
16159 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16160 vdupq_laneq_p16 (poly16x8_t __a, const int __b)
16162 return __aarch64_vdupq_laneq_p16 (__a, __b);
16165 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16166 vdupq_laneq_s8 (int8x16_t __a, const int __b)
16168 return __aarch64_vdupq_laneq_s8 (__a, __b);
16171 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16172 vdupq_laneq_s16 (int16x8_t __a, const int __b)
16174 return __aarch64_vdupq_laneq_s16 (__a, __b);
16177 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16178 vdupq_laneq_s32 (int32x4_t __a, const int __b)
16180 return __aarch64_vdupq_laneq_s32 (__a, __b);
16183 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16184 vdupq_laneq_s64 (int64x2_t __a, const int __b)
16186 return __aarch64_vdupq_laneq_s64 (__a, __b);
16189 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16190 vdupq_laneq_u8 (uint8x16_t __a, const int __b)
16192 return __aarch64_vdupq_laneq_u8 (__a, __b);
16195 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16196 vdupq_laneq_u16 (uint16x8_t __a, const int __b)
16198 return __aarch64_vdupq_laneq_u16 (__a, __b);
16201 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16202 vdupq_laneq_u32 (uint32x4_t __a, const int __b)
16204 return __aarch64_vdupq_laneq_u32 (__a, __b);
16207 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16208 vdupq_laneq_u64 (uint64x2_t __a, const int __b)
16210 return __aarch64_vdupq_laneq_u64 (__a, __b);
16213 /* vdupb_lane */
16214 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
16215 vdupb_lane_p8 (poly8x8_t __a, const int __b)
16217 return __aarch64_vget_lane_p8 (__a, __b);
16220 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
16221 vdupb_lane_s8 (int8x8_t __a, const int __b)
16223 return __aarch64_vget_lane_s8 (__a, __b);
16226 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16227 vdupb_lane_u8 (uint8x8_t __a, const int __b)
16229 return __aarch64_vget_lane_u8 (__a, __b);
16232 /* vduph_lane */
16233 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
16234 vduph_lane_p16 (poly16x4_t __a, const int __b)
16236 return __aarch64_vget_lane_p16 (__a, __b);
16239 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
16240 vduph_lane_s16 (int16x4_t __a, const int __b)
16242 return __aarch64_vget_lane_s16 (__a, __b);
16245 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16246 vduph_lane_u16 (uint16x4_t __a, const int __b)
16248 return __aarch64_vget_lane_u16 (__a, __b);
16251 /* vdups_lane */
16252 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16253 vdups_lane_f32 (float32x2_t __a, const int __b)
16255 return __aarch64_vget_lane_f32 (__a, __b);
16258 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
16259 vdups_lane_s32 (int32x2_t __a, const int __b)
16261 return __aarch64_vget_lane_s32 (__a, __b);
16264 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16265 vdups_lane_u32 (uint32x2_t __a, const int __b)
16267 return __aarch64_vget_lane_u32 (__a, __b);
16270 /* vdupd_lane */
16271 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16272 vdupd_lane_f64 (float64x1_t __a, const int __b)
16274 __builtin_aarch64_im_lane_boundsi (__b, 1);
16275 return __a[0];
16278 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
16279 vdupd_lane_s64 (int64x1_t __a, const int __b)
16281 __builtin_aarch64_im_lane_boundsi (__b, 1);
16282 return __a[0];
16285 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16286 vdupd_lane_u64 (uint64x1_t __a, const int __b)
16288 __builtin_aarch64_im_lane_boundsi (__b, 1);
16289 return __a[0];
16292 /* vdupb_laneq */
16293 __extension__ static __inline poly8_t __attribute__ ((__always_inline__))
16294 vdupb_laneq_p8 (poly8x16_t __a, const int __b)
16296 return __aarch64_vgetq_lane_p8 (__a, __b);
16299 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
16300 vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b)
16302 return __aarch64_vgetq_lane_s8 (__a, __b);
16305 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
16306 vdupb_laneq_u8 (uint8x16_t __a, const int __b)
16308 return __aarch64_vgetq_lane_u8 (__a, __b);
16311 /* vduph_laneq */
16312 __extension__ static __inline poly16_t __attribute__ ((__always_inline__))
16313 vduph_laneq_p16 (poly16x8_t __a, const int __b)
16315 return __aarch64_vgetq_lane_p16 (__a, __b);
16318 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
16319 vduph_laneq_s16 (int16x8_t __a, const int __b)
16321 return __aarch64_vgetq_lane_s16 (__a, __b);
16324 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
16325 vduph_laneq_u16 (uint16x8_t __a, const int __b)
16327 return __aarch64_vgetq_lane_u16 (__a, __b);
16330 /* vdups_laneq */
16331 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16332 vdups_laneq_f32 (float32x4_t __a, const int __b)
16334 return __aarch64_vgetq_lane_f32 (__a, __b);
16337 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
16338 vdups_laneq_s32 (int32x4_t __a, const int __b)
16340 return __aarch64_vgetq_lane_s32 (__a, __b);
16343 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
16344 vdups_laneq_u32 (uint32x4_t __a, const int __b)
16346 return __aarch64_vgetq_lane_u32 (__a, __b);
16349 /* vdupd_laneq */
16350 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16351 vdupd_laneq_f64 (float64x2_t __a, const int __b)
16353 return __aarch64_vgetq_lane_f64 (__a, __b);
16356 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
16357 vdupd_laneq_s64 (int64x2_t __a, const int __b)
16359 return __aarch64_vgetq_lane_s64 (__a, __b);
16362 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
16363 vdupd_laneq_u64 (uint64x2_t __a, const int __b)
16365 return __aarch64_vgetq_lane_u64 (__a, __b);
16368 /* vext */
16370 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16371 vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
16373 __builtin_aarch64_im_lane_boundsi (__c, 2);
16374 #ifdef __AARCH64EB__
16375 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
16376 #else
16377 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
16378 #endif
16381 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16382 vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
16384 /* The only possible index to the assembler instruction returns element 0. */
16385 __builtin_aarch64_im_lane_boundsi (__c, 1);
16386 return __a;
16388 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16389 vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
16391 __builtin_aarch64_im_lane_boundsi (__c, 8);
16392 #ifdef __AARCH64EB__
16393 return __builtin_shuffle (__b, __a, (uint8x8_t)
16394 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16395 #else
16396 return __builtin_shuffle (__a, __b,
16397 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16398 #endif
16401 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16402 vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
16404 __builtin_aarch64_im_lane_boundsi (__c, 4);
16405 #ifdef __AARCH64EB__
16406 return __builtin_shuffle (__b, __a,
16407 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16408 #else
16409 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
16410 #endif
16413 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16414 vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
16416 __builtin_aarch64_im_lane_boundsi (__c, 8);
16417 #ifdef __AARCH64EB__
16418 return __builtin_shuffle (__b, __a, (uint8x8_t)
16419 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16420 #else
16421 return __builtin_shuffle (__a, __b,
16422 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16423 #endif
16426 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16427 vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
16429 __builtin_aarch64_im_lane_boundsi (__c, 4);
16430 #ifdef __AARCH64EB__
16431 return __builtin_shuffle (__b, __a,
16432 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16433 #else
16434 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
16435 #endif
16438 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16439 vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
16441 __builtin_aarch64_im_lane_boundsi (__c, 2);
16442 #ifdef __AARCH64EB__
16443 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
16444 #else
16445 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
16446 #endif
16449 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16450 vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
16452 /* The only possible index to the assembler instruction returns element 0. */
16453 __builtin_aarch64_im_lane_boundsi (__c, 1);
16454 return __a;
16457 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16458 vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
16460 __builtin_aarch64_im_lane_boundsi (__c, 8);
16461 #ifdef __AARCH64EB__
16462 return __builtin_shuffle (__b, __a, (uint8x8_t)
16463 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16464 #else
16465 return __builtin_shuffle (__a, __b,
16466 (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16467 #endif
16470 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16471 vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
16473 __builtin_aarch64_im_lane_boundsi (__c, 4);
16474 #ifdef __AARCH64EB__
16475 return __builtin_shuffle (__b, __a,
16476 (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16477 #else
16478 return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
16479 #endif
16482 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16483 vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
16485 __builtin_aarch64_im_lane_boundsi (__c, 2);
16486 #ifdef __AARCH64EB__
16487 return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
16488 #else
16489 return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
16490 #endif
16493 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16494 vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
16496 /* The only possible index to the assembler instruction returns element 0. */
16497 __builtin_aarch64_im_lane_boundsi (__c, 1);
16498 return __a;
16501 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16502 vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
16504 __builtin_aarch64_im_lane_boundsi (__c, 4);
16505 #ifdef __AARCH64EB__
16506 return __builtin_shuffle (__b, __a,
16507 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16508 #else
16509 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16510 #endif
16513 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16514 vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
16516 __builtin_aarch64_im_lane_boundsi (__c, 2);
16517 #ifdef __AARCH64EB__
16518 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16519 #else
16520 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16521 #endif
16524 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16525 vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
16527 __builtin_aarch64_im_lane_boundsi (__c, 16);
16528 #ifdef __AARCH64EB__
16529 return __builtin_shuffle (__b, __a, (uint8x16_t)
16530 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16531 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16532 #else
16533 return __builtin_shuffle (__a, __b, (uint8x16_t)
16534 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16535 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16536 #endif
16539 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16540 vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
16542 __builtin_aarch64_im_lane_boundsi (__c, 8);
16543 #ifdef __AARCH64EB__
16544 return __builtin_shuffle (__b, __a, (uint16x8_t)
16545 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16546 #else
16547 return __builtin_shuffle (__a, __b,
16548 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16549 #endif
16552 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16553 vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
16555 __builtin_aarch64_im_lane_boundsi (__c, 16);
16556 #ifdef __AARCH64EB__
16557 return __builtin_shuffle (__b, __a, (uint8x16_t)
16558 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16559 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16560 #else
16561 return __builtin_shuffle (__a, __b, (uint8x16_t)
16562 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16563 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16564 #endif
16567 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16568 vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
16570 __builtin_aarch64_im_lane_boundsi (__c, 8);
16571 #ifdef __AARCH64EB__
16572 return __builtin_shuffle (__b, __a, (uint16x8_t)
16573 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16574 #else
16575 return __builtin_shuffle (__a, __b,
16576 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16577 #endif
16580 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16581 vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
16583 __builtin_aarch64_im_lane_boundsi (__c, 4);
16584 #ifdef __AARCH64EB__
16585 return __builtin_shuffle (__b, __a,
16586 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16587 #else
16588 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16589 #endif
16592 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
16593 vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
16595 __builtin_aarch64_im_lane_boundsi (__c, 2);
16596 #ifdef __AARCH64EB__
16597 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16598 #else
16599 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16600 #endif
16603 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
16604 vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
16606 __builtin_aarch64_im_lane_boundsi (__c, 16);
16607 #ifdef __AARCH64EB__
16608 return __builtin_shuffle (__b, __a, (uint8x16_t)
16609 {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
16610 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
16611 #else
16612 return __builtin_shuffle (__a, __b, (uint8x16_t)
16613 {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
16614 __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
16615 #endif
16618 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
16619 vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
16621 __builtin_aarch64_im_lane_boundsi (__c, 8);
16622 #ifdef __AARCH64EB__
16623 return __builtin_shuffle (__b, __a, (uint16x8_t)
16624 {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
16625 #else
16626 return __builtin_shuffle (__a, __b,
16627 (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
16628 #endif
16631 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
16632 vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
16634 __builtin_aarch64_im_lane_boundsi (__c, 4);
16635 #ifdef __AARCH64EB__
16636 return __builtin_shuffle (__b, __a,
16637 (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
16638 #else
16639 return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
16640 #endif
16643 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
16644 vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
16646 __builtin_aarch64_im_lane_boundsi (__c, 2);
16647 #ifdef __AARCH64EB__
16648 return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
16649 #else
16650 return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
16651 #endif
16654 /* vfma */
16656 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16657 vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
16659 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
16662 /* vfma_lane */
16664 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16665 vfma_lane_f32 (float32x2_t __a, float32x2_t __b,
16666 float32x2_t __c, const int __lane)
16668 return __builtin_aarch64_fmav2sf (__b,
16669 __aarch64_vdup_lane_f32 (__c, __lane),
16670 __a);
16673 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16674 vfma_lane_f64 (float64x1_t __a, float64x1_t __b,
16675 float64x1_t __c, const int __lane)
16677 return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])};
16680 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16681 vfmad_lane_f64 (float64_t __a, float64_t __b,
16682 float64x1_t __c, const int __lane)
16684 return __builtin_fma (__b, __c[0], __a);
16687 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16688 vfmas_lane_f32 (float32_t __a, float32_t __b,
16689 float32x2_t __c, const int __lane)
16691 return __builtin_fmaf (__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
16694 /* vfma_laneq */
16696 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16697 vfma_laneq_f32 (float32x2_t __a, float32x2_t __b,
16698 float32x4_t __c, const int __lane)
16700 return __builtin_aarch64_fmav2sf (__b,
16701 __aarch64_vdup_laneq_f32 (__c, __lane),
16702 __a);
16705 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16706 vfma_laneq_f64 (float64x1_t __a, float64x1_t __b,
16707 float64x2_t __c, const int __lane)
16709 float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
16710 return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])};
16713 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16714 vfmad_laneq_f64 (float64_t __a, float64_t __b,
16715 float64x2_t __c, const int __lane)
16717 return __builtin_fma (__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
16720 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16721 vfmas_laneq_f32 (float32_t __a, float32_t __b,
16722 float32x4_t __c, const int __lane)
16724 return __builtin_fmaf (__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
16727 /* vfmaq_lane */
16729 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16730 vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b,
16731 float32x2_t __c, const int __lane)
16733 return __builtin_aarch64_fmav4sf (__b,
16734 __aarch64_vdupq_lane_f32 (__c, __lane),
16735 __a);
16738 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16739 vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b,
16740 float64x1_t __c, const int __lane)
16742 return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a);
16745 /* vfmaq_laneq */
16747 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16748 vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
16749 float32x4_t __c, const int __lane)
16751 return __builtin_aarch64_fmav4sf (__b,
16752 __aarch64_vdupq_laneq_f32 (__c, __lane),
16753 __a);
16756 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16757 vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b,
16758 float64x2_t __c, const int __lane)
16760 return __builtin_aarch64_fmav2df (__b,
16761 __aarch64_vdupq_laneq_f64 (__c, __lane),
16762 __a);
16765 /* vfms */
16767 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16768 vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
16770 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
16773 /* vfms_lane */
16775 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16776 vfms_lane_f32 (float32x2_t __a, float32x2_t __b,
16777 float32x2_t __c, const int __lane)
16779 return __builtin_aarch64_fmav2sf (-__b,
16780 __aarch64_vdup_lane_f32 (__c, __lane),
16781 __a);
16784 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16785 vfms_lane_f64 (float64x1_t __a, float64x1_t __b,
16786 float64x1_t __c, const int __lane)
16788 return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])};
16791 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16792 vfmsd_lane_f64 (float64_t __a, float64_t __b,
16793 float64x1_t __c, const int __lane)
16795 return __builtin_fma (-__b, __c[0], __a);
16798 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16799 vfmss_lane_f32 (float32_t __a, float32_t __b,
16800 float32x2_t __c, const int __lane)
16802 return __builtin_fmaf (-__b, __aarch64_vget_lane_f32 (__c, __lane), __a);
16805 /* vfms_laneq */
16807 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16808 vfms_laneq_f32 (float32x2_t __a, float32x2_t __b,
16809 float32x4_t __c, const int __lane)
16811 return __builtin_aarch64_fmav2sf (-__b,
16812 __aarch64_vdup_laneq_f32 (__c, __lane),
16813 __a);
16816 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16817 vfms_laneq_f64 (float64x1_t __a, float64x1_t __b,
16818 float64x2_t __c, const int __lane)
16820 float64_t __c0 = __aarch64_vgetq_lane_f64 (__c, __lane);
16821 return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])};
16824 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
16825 vfmsd_laneq_f64 (float64_t __a, float64_t __b,
16826 float64x2_t __c, const int __lane)
16828 return __builtin_fma (-__b, __aarch64_vgetq_lane_f64 (__c, __lane), __a);
16831 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
16832 vfmss_laneq_f32 (float32_t __a, float32_t __b,
16833 float32x4_t __c, const int __lane)
16835 return __builtin_fmaf (-__b, __aarch64_vgetq_lane_f32 (__c, __lane), __a);
16838 /* vfmsq_lane */
16840 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16841 vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b,
16842 float32x2_t __c, const int __lane)
16844 return __builtin_aarch64_fmav4sf (-__b,
16845 __aarch64_vdupq_lane_f32 (__c, __lane),
16846 __a);
16849 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16850 vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b,
16851 float64x1_t __c, const int __lane)
16853 return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a);
16856 /* vfmsq_laneq */
16858 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16859 vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
16860 float32x4_t __c, const int __lane)
16862 return __builtin_aarch64_fmav4sf (-__b,
16863 __aarch64_vdupq_laneq_f32 (__c, __lane),
16864 __a);
16867 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16868 vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b,
16869 float64x2_t __c, const int __lane)
16871 return __builtin_aarch64_fmav2df (-__b,
16872 __aarch64_vdupq_laneq_f64 (__c, __lane),
16873 __a);
16876 /* vld1 */
16878 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
16879 vld1_f32 (const float32_t *a)
16881 return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a);
16884 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
16885 vld1_f64 (const float64_t *a)
16887 return (float64x1_t) {*a};
16890 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
16891 vld1_p8 (const poly8_t *a)
16893 return (poly8x8_t)
16894 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
16897 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
16898 vld1_p16 (const poly16_t *a)
16900 return (poly16x4_t)
16901 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
16904 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
16905 vld1_s8 (const int8_t *a)
16907 return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
16910 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
16911 vld1_s16 (const int16_t *a)
16913 return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
16916 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
16917 vld1_s32 (const int32_t *a)
16919 return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
16922 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
16923 vld1_s64 (const int64_t *a)
16925 return (int64x1_t) {*a};
16928 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
16929 vld1_u8 (const uint8_t *a)
16931 return (uint8x8_t)
16932 __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a);
16935 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
16936 vld1_u16 (const uint16_t *a)
16938 return (uint16x4_t)
16939 __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a);
16942 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
16943 vld1_u32 (const uint32_t *a)
16945 return (uint32x2_t)
16946 __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a);
16949 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
16950 vld1_u64 (const uint64_t *a)
16952 return (uint64x1_t) {*a};
16955 /* vld1q */
16957 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
16958 vld1q_f32 (const float32_t *a)
16960 return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a);
16963 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
16964 vld1q_f64 (const float64_t *a)
16966 return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a);
16969 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
16970 vld1q_p8 (const poly8_t *a)
16972 return (poly8x16_t)
16973 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
16976 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
16977 vld1q_p16 (const poly16_t *a)
16979 return (poly16x8_t)
16980 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
16983 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
16984 vld1q_s8 (const int8_t *a)
16986 return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
16989 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
16990 vld1q_s16 (const int16_t *a)
16992 return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
16995 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
16996 vld1q_s32 (const int32_t *a)
16998 return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
17001 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
17002 vld1q_s64 (const int64_t *a)
17004 return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
17007 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17008 vld1q_u8 (const uint8_t *a)
17010 return (uint8x16_t)
17011 __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a);
17014 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17015 vld1q_u16 (const uint16_t *a)
17017 return (uint16x8_t)
17018 __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a);
17021 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17022 vld1q_u32 (const uint32_t *a)
17024 return (uint32x4_t)
17025 __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a);
17028 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
17029 vld1q_u64 (const uint64_t *a)
17031 return (uint64x2_t)
17032 __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a);
17035 /* vldn */
17037 __extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__))
17038 vld2_s64 (const int64_t * __a)
17040 int64x1x2_t ret;
17041 __builtin_aarch64_simd_oi __o;
17042 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
17043 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
17044 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
17045 return ret;
17048 __extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__))
17049 vld2_u64 (const uint64_t * __a)
17051 uint64x1x2_t ret;
17052 __builtin_aarch64_simd_oi __o;
17053 __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a);
17054 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0);
17055 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1);
17056 return ret;
17059 __extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__))
17060 vld2_f64 (const float64_t * __a)
17062 float64x1x2_t ret;
17063 __builtin_aarch64_simd_oi __o;
17064 __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a);
17065 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)};
17066 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)};
17067 return ret;
17070 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
17071 vld2_s8 (const int8_t * __a)
17073 int8x8x2_t ret;
17074 __builtin_aarch64_simd_oi __o;
17075 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
17076 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17077 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17078 return ret;
17081 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
17082 vld2_p8 (const poly8_t * __a)
17084 poly8x8x2_t ret;
17085 __builtin_aarch64_simd_oi __o;
17086 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
17087 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17088 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17089 return ret;
17092 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
17093 vld2_s16 (const int16_t * __a)
17095 int16x4x2_t ret;
17096 __builtin_aarch64_simd_oi __o;
17097 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
17098 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17099 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17100 return ret;
17103 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
17104 vld2_p16 (const poly16_t * __a)
17106 poly16x4x2_t ret;
17107 __builtin_aarch64_simd_oi __o;
17108 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
17109 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17110 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17111 return ret;
17114 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
17115 vld2_s32 (const int32_t * __a)
17117 int32x2x2_t ret;
17118 __builtin_aarch64_simd_oi __o;
17119 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
17120 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
17121 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
17122 return ret;
17125 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
17126 vld2_u8 (const uint8_t * __a)
17128 uint8x8x2_t ret;
17129 __builtin_aarch64_simd_oi __o;
17130 __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a);
17131 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0);
17132 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1);
17133 return ret;
17136 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
17137 vld2_u16 (const uint16_t * __a)
17139 uint16x4x2_t ret;
17140 __builtin_aarch64_simd_oi __o;
17141 __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a);
17142 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0);
17143 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1);
17144 return ret;
17147 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
17148 vld2_u32 (const uint32_t * __a)
17150 uint32x2x2_t ret;
17151 __builtin_aarch64_simd_oi __o;
17152 __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a);
17153 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0);
17154 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1);
17155 return ret;
17158 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
17159 vld2_f32 (const float32_t * __a)
17161 float32x2x2_t ret;
17162 __builtin_aarch64_simd_oi __o;
17163 __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a);
17164 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0);
17165 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1);
17166 return ret;
17169 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
17170 vld2q_s8 (const int8_t * __a)
17172 int8x16x2_t ret;
17173 __builtin_aarch64_simd_oi __o;
17174 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
17175 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17176 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17177 return ret;
17180 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
17181 vld2q_p8 (const poly8_t * __a)
17183 poly8x16x2_t ret;
17184 __builtin_aarch64_simd_oi __o;
17185 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
17186 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17187 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17188 return ret;
17191 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
17192 vld2q_s16 (const int16_t * __a)
17194 int16x8x2_t ret;
17195 __builtin_aarch64_simd_oi __o;
17196 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
17197 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17198 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17199 return ret;
17202 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
17203 vld2q_p16 (const poly16_t * __a)
17205 poly16x8x2_t ret;
17206 __builtin_aarch64_simd_oi __o;
17207 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
17208 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17209 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17210 return ret;
17213 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
17214 vld2q_s32 (const int32_t * __a)
17216 int32x4x2_t ret;
17217 __builtin_aarch64_simd_oi __o;
17218 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
17219 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
17220 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
17221 return ret;
17224 __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__))
17225 vld2q_s64 (const int64_t * __a)
17227 int64x2x2_t ret;
17228 __builtin_aarch64_simd_oi __o;
17229 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
17230 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
17231 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
17232 return ret;
17235 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
17236 vld2q_u8 (const uint8_t * __a)
17238 uint8x16x2_t ret;
17239 __builtin_aarch64_simd_oi __o;
17240 __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a);
17241 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0);
17242 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1);
17243 return ret;
17246 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
17247 vld2q_u16 (const uint16_t * __a)
17249 uint16x8x2_t ret;
17250 __builtin_aarch64_simd_oi __o;
17251 __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a);
17252 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0);
17253 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1);
17254 return ret;
17257 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
17258 vld2q_u32 (const uint32_t * __a)
17260 uint32x4x2_t ret;
17261 __builtin_aarch64_simd_oi __o;
17262 __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a);
17263 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0);
17264 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1);
17265 return ret;
17268 __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__))
17269 vld2q_u64 (const uint64_t * __a)
17271 uint64x2x2_t ret;
17272 __builtin_aarch64_simd_oi __o;
17273 __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a);
17274 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0);
17275 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1);
17276 return ret;
17279 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
17280 vld2q_f32 (const float32_t * __a)
17282 float32x4x2_t ret;
17283 __builtin_aarch64_simd_oi __o;
17284 __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a);
17285 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0);
17286 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1);
17287 return ret;
17290 __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__))
17291 vld2q_f64 (const float64_t * __a)
17293 float64x2x2_t ret;
17294 __builtin_aarch64_simd_oi __o;
17295 __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a);
17296 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0);
17297 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1);
17298 return ret;
17301 __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__))
17302 vld3_s64 (const int64_t * __a)
17304 int64x1x3_t ret;
17305 __builtin_aarch64_simd_ci __o;
17306 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
17307 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
17308 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
17309 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
17310 return ret;
17313 __extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__))
17314 vld3_u64 (const uint64_t * __a)
17316 uint64x1x3_t ret;
17317 __builtin_aarch64_simd_ci __o;
17318 __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a);
17319 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0);
17320 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1);
17321 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2);
17322 return ret;
17325 __extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__))
17326 vld3_f64 (const float64_t * __a)
17328 float64x1x3_t ret;
17329 __builtin_aarch64_simd_ci __o;
17330 __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a);
17331 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)};
17332 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)};
17333 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)};
17334 return ret;
17337 __extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__))
17338 vld3_s8 (const int8_t * __a)
17340 int8x8x3_t ret;
17341 __builtin_aarch64_simd_ci __o;
17342 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
17343 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17344 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17345 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17346 return ret;
17349 __extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__))
17350 vld3_p8 (const poly8_t * __a)
17352 poly8x8x3_t ret;
17353 __builtin_aarch64_simd_ci __o;
17354 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
17355 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17356 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17357 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17358 return ret;
17361 __extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__))
17362 vld3_s16 (const int16_t * __a)
17364 int16x4x3_t ret;
17365 __builtin_aarch64_simd_ci __o;
17366 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
17367 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17368 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17369 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17370 return ret;
17373 __extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__))
17374 vld3_p16 (const poly16_t * __a)
17376 poly16x4x3_t ret;
17377 __builtin_aarch64_simd_ci __o;
17378 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
17379 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17380 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17381 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17382 return ret;
17385 __extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__))
17386 vld3_s32 (const int32_t * __a)
17388 int32x2x3_t ret;
17389 __builtin_aarch64_simd_ci __o;
17390 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
17391 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17392 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17393 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17394 return ret;
17397 __extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__))
17398 vld3_u8 (const uint8_t * __a)
17400 uint8x8x3_t ret;
17401 __builtin_aarch64_simd_ci __o;
17402 __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a);
17403 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0);
17404 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1);
17405 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2);
17406 return ret;
17409 __extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__))
17410 vld3_u16 (const uint16_t * __a)
17412 uint16x4x3_t ret;
17413 __builtin_aarch64_simd_ci __o;
17414 __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a);
17415 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0);
17416 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1);
17417 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2);
17418 return ret;
17421 __extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__))
17422 vld3_u32 (const uint32_t * __a)
17424 uint32x2x3_t ret;
17425 __builtin_aarch64_simd_ci __o;
17426 __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a);
17427 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0);
17428 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1);
17429 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2);
17430 return ret;
17433 __extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__))
17434 vld3_f32 (const float32_t * __a)
17436 float32x2x3_t ret;
17437 __builtin_aarch64_simd_ci __o;
17438 __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a);
17439 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0);
17440 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1);
17441 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2);
17442 return ret;
17445 __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__))
17446 vld3q_s8 (const int8_t * __a)
17448 int8x16x3_t ret;
17449 __builtin_aarch64_simd_ci __o;
17450 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
17451 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17452 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17453 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17454 return ret;
17457 __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__))
17458 vld3q_p8 (const poly8_t * __a)
17460 poly8x16x3_t ret;
17461 __builtin_aarch64_simd_ci __o;
17462 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
17463 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17464 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17465 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17466 return ret;
17469 __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__))
17470 vld3q_s16 (const int16_t * __a)
17472 int16x8x3_t ret;
17473 __builtin_aarch64_simd_ci __o;
17474 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
17475 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17476 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17477 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17478 return ret;
17481 __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__))
17482 vld3q_p16 (const poly16_t * __a)
17484 poly16x8x3_t ret;
17485 __builtin_aarch64_simd_ci __o;
17486 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
17487 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17488 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17489 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17490 return ret;
17493 __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__))
17494 vld3q_s32 (const int32_t * __a)
17496 int32x4x3_t ret;
17497 __builtin_aarch64_simd_ci __o;
17498 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
17499 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17500 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17501 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17502 return ret;
17505 __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__))
17506 vld3q_s64 (const int64_t * __a)
17508 int64x2x3_t ret;
17509 __builtin_aarch64_simd_ci __o;
17510 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
17511 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17512 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17513 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17514 return ret;
17517 __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__))
17518 vld3q_u8 (const uint8_t * __a)
17520 uint8x16x3_t ret;
17521 __builtin_aarch64_simd_ci __o;
17522 __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a);
17523 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0);
17524 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1);
17525 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2);
17526 return ret;
17529 __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__))
17530 vld3q_u16 (const uint16_t * __a)
17532 uint16x8x3_t ret;
17533 __builtin_aarch64_simd_ci __o;
17534 __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a);
17535 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0);
17536 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1);
17537 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2);
17538 return ret;
17541 __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__))
17542 vld3q_u32 (const uint32_t * __a)
17544 uint32x4x3_t ret;
17545 __builtin_aarch64_simd_ci __o;
17546 __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a);
17547 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0);
17548 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1);
17549 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2);
17550 return ret;
17553 __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__))
17554 vld3q_u64 (const uint64_t * __a)
17556 uint64x2x3_t ret;
17557 __builtin_aarch64_simd_ci __o;
17558 __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a);
17559 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0);
17560 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1);
17561 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2);
17562 return ret;
17565 __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__))
17566 vld3q_f32 (const float32_t * __a)
17568 float32x4x3_t ret;
17569 __builtin_aarch64_simd_ci __o;
17570 __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a);
17571 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0);
17572 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1);
17573 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2);
17574 return ret;
17577 __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__))
17578 vld3q_f64 (const float64_t * __a)
17580 float64x2x3_t ret;
17581 __builtin_aarch64_simd_ci __o;
17582 __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a);
17583 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0);
17584 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1);
17585 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2);
17586 return ret;
17589 __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__))
17590 vld4_s64 (const int64_t * __a)
17592 int64x1x4_t ret;
17593 __builtin_aarch64_simd_xi __o;
17594 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
17595 ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17596 ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17597 ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17598 ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17599 return ret;
17602 __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__))
17603 vld4_u64 (const uint64_t * __a)
17605 uint64x1x4_t ret;
17606 __builtin_aarch64_simd_xi __o;
17607 __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a);
17608 ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0);
17609 ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1);
17610 ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2);
17611 ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3);
17612 return ret;
17615 __extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__))
17616 vld4_f64 (const float64_t * __a)
17618 float64x1x4_t ret;
17619 __builtin_aarch64_simd_xi __o;
17620 __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a);
17621 ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)};
17622 ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)};
17623 ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)};
17624 ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)};
17625 return ret;
17628 __extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__))
17629 vld4_s8 (const int8_t * __a)
17631 int8x8x4_t ret;
17632 __builtin_aarch64_simd_xi __o;
17633 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17634 ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17635 ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17636 ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17637 ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17638 return ret;
17641 __extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__))
17642 vld4_p8 (const poly8_t * __a)
17644 poly8x8x4_t ret;
17645 __builtin_aarch64_simd_xi __o;
17646 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17647 ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17648 ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17649 ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17650 ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17651 return ret;
17654 __extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__))
17655 vld4_s16 (const int16_t * __a)
17657 int16x4x4_t ret;
17658 __builtin_aarch64_simd_xi __o;
17659 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17660 ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17661 ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17662 ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17663 ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17664 return ret;
17667 __extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__))
17668 vld4_p16 (const poly16_t * __a)
17670 poly16x4x4_t ret;
17671 __builtin_aarch64_simd_xi __o;
17672 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17673 ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17674 ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17675 ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17676 ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17677 return ret;
17680 __extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__))
17681 vld4_s32 (const int32_t * __a)
17683 int32x2x4_t ret;
17684 __builtin_aarch64_simd_xi __o;
17685 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
17686 ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17687 ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17688 ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17689 ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17690 return ret;
17693 __extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__))
17694 vld4_u8 (const uint8_t * __a)
17696 uint8x8x4_t ret;
17697 __builtin_aarch64_simd_xi __o;
17698 __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a);
17699 ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0);
17700 ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1);
17701 ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2);
17702 ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3);
17703 return ret;
17706 __extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__))
17707 vld4_u16 (const uint16_t * __a)
17709 uint16x4x4_t ret;
17710 __builtin_aarch64_simd_xi __o;
17711 __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a);
17712 ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0);
17713 ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1);
17714 ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2);
17715 ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3);
17716 return ret;
17719 __extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__))
17720 vld4_u32 (const uint32_t * __a)
17722 uint32x2x4_t ret;
17723 __builtin_aarch64_simd_xi __o;
17724 __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a);
17725 ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0);
17726 ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1);
17727 ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2);
17728 ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3);
17729 return ret;
17732 __extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__))
17733 vld4_f32 (const float32_t * __a)
17735 float32x2x4_t ret;
17736 __builtin_aarch64_simd_xi __o;
17737 __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a);
17738 ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0);
17739 ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1);
17740 ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2);
17741 ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3);
17742 return ret;
17745 __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__))
17746 vld4q_s8 (const int8_t * __a)
17748 int8x16x4_t ret;
17749 __builtin_aarch64_simd_xi __o;
17750 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17751 ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17752 ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17753 ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17754 ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17755 return ret;
17758 __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__))
17759 vld4q_p8 (const poly8_t * __a)
17761 poly8x16x4_t ret;
17762 __builtin_aarch64_simd_xi __o;
17763 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17764 ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17765 ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17766 ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17767 ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17768 return ret;
17771 __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__))
17772 vld4q_s16 (const int16_t * __a)
17774 int16x8x4_t ret;
17775 __builtin_aarch64_simd_xi __o;
17776 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17777 ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17778 ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17779 ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17780 ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17781 return ret;
17784 __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__))
17785 vld4q_p16 (const poly16_t * __a)
17787 poly16x8x4_t ret;
17788 __builtin_aarch64_simd_xi __o;
17789 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17790 ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17791 ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17792 ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17793 ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17794 return ret;
17797 __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__))
17798 vld4q_s32 (const int32_t * __a)
17800 int32x4x4_t ret;
17801 __builtin_aarch64_simd_xi __o;
17802 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
17803 ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17804 ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17805 ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17806 ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17807 return ret;
17810 __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__))
17811 vld4q_s64 (const int64_t * __a)
17813 int64x2x4_t ret;
17814 __builtin_aarch64_simd_xi __o;
17815 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
17816 ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17817 ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17818 ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17819 ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17820 return ret;
17823 __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__))
17824 vld4q_u8 (const uint8_t * __a)
17826 uint8x16x4_t ret;
17827 __builtin_aarch64_simd_xi __o;
17828 __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a);
17829 ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0);
17830 ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1);
17831 ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2);
17832 ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3);
17833 return ret;
17836 __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__))
17837 vld4q_u16 (const uint16_t * __a)
17839 uint16x8x4_t ret;
17840 __builtin_aarch64_simd_xi __o;
17841 __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a);
17842 ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0);
17843 ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1);
17844 ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2);
17845 ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3);
17846 return ret;
17849 __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__))
17850 vld4q_u32 (const uint32_t * __a)
17852 uint32x4x4_t ret;
17853 __builtin_aarch64_simd_xi __o;
17854 __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a);
17855 ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0);
17856 ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1);
17857 ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2);
17858 ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3);
17859 return ret;
17862 __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__))
17863 vld4q_u64 (const uint64_t * __a)
17865 uint64x2x4_t ret;
17866 __builtin_aarch64_simd_xi __o;
17867 __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a);
17868 ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0);
17869 ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1);
17870 ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2);
17871 ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3);
17872 return ret;
17875 __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__))
17876 vld4q_f32 (const float32_t * __a)
17878 float32x4x4_t ret;
17879 __builtin_aarch64_simd_xi __o;
17880 __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a);
17881 ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0);
17882 ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1);
17883 ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2);
17884 ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3);
17885 return ret;
17888 __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__))
17889 vld4q_f64 (const float64_t * __a)
17891 float64x2x4_t ret;
17892 __builtin_aarch64_simd_xi __o;
17893 __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a);
17894 ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0);
17895 ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1);
17896 ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2);
17897 ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3);
17898 return ret;
17901 /* vmax */
17903 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
17904 vmax_f32 (float32x2_t __a, float32x2_t __b)
17906 return __builtin_aarch64_smax_nanv2sf (__a, __b);
17909 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
17910 vmax_s8 (int8x8_t __a, int8x8_t __b)
17912 return __builtin_aarch64_smaxv8qi (__a, __b);
17915 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
17916 vmax_s16 (int16x4_t __a, int16x4_t __b)
17918 return __builtin_aarch64_smaxv4hi (__a, __b);
17921 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
17922 vmax_s32 (int32x2_t __a, int32x2_t __b)
17924 return __builtin_aarch64_smaxv2si (__a, __b);
17927 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
17928 vmax_u8 (uint8x8_t __a, uint8x8_t __b)
17930 return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a,
17931 (int8x8_t) __b);
17934 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
17935 vmax_u16 (uint16x4_t __a, uint16x4_t __b)
17937 return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a,
17938 (int16x4_t) __b);
17941 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
17942 vmax_u32 (uint32x2_t __a, uint32x2_t __b)
17944 return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a,
17945 (int32x2_t) __b);
17948 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
17949 vmaxq_f32 (float32x4_t __a, float32x4_t __b)
17951 return __builtin_aarch64_smax_nanv4sf (__a, __b);
17954 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
17955 vmaxq_f64 (float64x2_t __a, float64x2_t __b)
17957 return __builtin_aarch64_smax_nanv2df (__a, __b);
17960 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
17961 vmaxq_s8 (int8x16_t __a, int8x16_t __b)
17963 return __builtin_aarch64_smaxv16qi (__a, __b);
17966 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
17967 vmaxq_s16 (int16x8_t __a, int16x8_t __b)
17969 return __builtin_aarch64_smaxv8hi (__a, __b);
17972 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
17973 vmaxq_s32 (int32x4_t __a, int32x4_t __b)
17975 return __builtin_aarch64_smaxv4si (__a, __b);
17978 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
17979 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
17981 return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a,
17982 (int8x16_t) __b);
17985 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
17986 vmaxq_u16 (uint16x8_t __a, uint16x8_t __b)
17988 return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a,
17989 (int16x8_t) __b);
17992 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
17993 vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
17995 return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a,
17996 (int32x4_t) __b);
17999 /* vmaxnm */
18001 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18002 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
18004 return __builtin_aarch64_smaxv2sf (__a, __b);
18007 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18008 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
18010 return __builtin_aarch64_smaxv4sf (__a, __b);
18013 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18014 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
18016 return __builtin_aarch64_smaxv2df (__a, __b);
18019 /* vmaxv */
18021 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18022 vmaxv_f32 (float32x2_t __a)
18024 return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
18028 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18029 vmaxv_s8 (int8x8_t __a)
18031 return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0);
18034 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18035 vmaxv_s16 (int16x4_t __a)
18037 return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0);
18040 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18041 vmaxv_s32 (int32x2_t __a)
18043 return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0);
18046 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18047 vmaxv_u8 (uint8x8_t __a)
18049 return vget_lane_u8 ((uint8x8_t)
18050 __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
18054 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18055 vmaxv_u16 (uint16x4_t __a)
18057 return vget_lane_u16 ((uint16x4_t)
18058 __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
18062 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18063 vmaxv_u32 (uint32x2_t __a)
18065 return vget_lane_u32 ((uint32x2_t)
18066 __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
18070 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18071 vmaxvq_f32 (float32x4_t __a)
18073 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
18077 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18078 vmaxvq_f64 (float64x2_t __a)
18080 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
18084 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18085 vmaxvq_s8 (int8x16_t __a)
18087 return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0);
18090 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18091 vmaxvq_s16 (int16x8_t __a)
18093 return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0);
18096 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18097 vmaxvq_s32 (int32x4_t __a)
18099 return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0);
18102 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18103 vmaxvq_u8 (uint8x16_t __a)
18105 return vgetq_lane_u8 ((uint8x16_t)
18106 __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
18110 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18111 vmaxvq_u16 (uint16x8_t __a)
18113 return vgetq_lane_u16 ((uint16x8_t)
18114 __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
18118 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18119 vmaxvq_u32 (uint32x4_t __a)
18121 return vgetq_lane_u32 ((uint32x4_t)
18122 __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
18126 /* vmaxnmv */
18128 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18129 vmaxnmv_f32 (float32x2_t __a)
18131 return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
18135 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18136 vmaxnmvq_f32 (float32x4_t __a)
18138 return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0);
18141 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18142 vmaxnmvq_f64 (float64x2_t __a)
18144 return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0);
18147 /* vmin */
18149 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18150 vmin_f32 (float32x2_t __a, float32x2_t __b)
18152 return __builtin_aarch64_smin_nanv2sf (__a, __b);
18155 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18156 vmin_s8 (int8x8_t __a, int8x8_t __b)
18158 return __builtin_aarch64_sminv8qi (__a, __b);
18161 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18162 vmin_s16 (int16x4_t __a, int16x4_t __b)
18164 return __builtin_aarch64_sminv4hi (__a, __b);
18167 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18168 vmin_s32 (int32x2_t __a, int32x2_t __b)
18170 return __builtin_aarch64_sminv2si (__a, __b);
18173 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18174 vmin_u8 (uint8x8_t __a, uint8x8_t __b)
18176 return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a,
18177 (int8x8_t) __b);
18180 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18181 vmin_u16 (uint16x4_t __a, uint16x4_t __b)
18183 return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a,
18184 (int16x4_t) __b);
18187 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18188 vmin_u32 (uint32x2_t __a, uint32x2_t __b)
18190 return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a,
18191 (int32x2_t) __b);
18194 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18195 vminq_f32 (float32x4_t __a, float32x4_t __b)
18197 return __builtin_aarch64_smin_nanv4sf (__a, __b);
18200 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18201 vminq_f64 (float64x2_t __a, float64x2_t __b)
18203 return __builtin_aarch64_smin_nanv2df (__a, __b);
18206 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18207 vminq_s8 (int8x16_t __a, int8x16_t __b)
18209 return __builtin_aarch64_sminv16qi (__a, __b);
18212 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18213 vminq_s16 (int16x8_t __a, int16x8_t __b)
18215 return __builtin_aarch64_sminv8hi (__a, __b);
18218 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18219 vminq_s32 (int32x4_t __a, int32x4_t __b)
18221 return __builtin_aarch64_sminv4si (__a, __b);
18224 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18225 vminq_u8 (uint8x16_t __a, uint8x16_t __b)
18227 return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a,
18228 (int8x16_t) __b);
18231 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18232 vminq_u16 (uint16x8_t __a, uint16x8_t __b)
18234 return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a,
18235 (int16x8_t) __b);
18238 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18239 vminq_u32 (uint32x4_t __a, uint32x4_t __b)
18241 return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a,
18242 (int32x4_t) __b);
18245 /* vminnm */
18247 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18248 vminnm_f32 (float32x2_t __a, float32x2_t __b)
18250 return __builtin_aarch64_sminv2sf (__a, __b);
18253 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18254 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
18256 return __builtin_aarch64_sminv4sf (__a, __b);
18259 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18260 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
18262 return __builtin_aarch64_sminv2df (__a, __b);
18265 /* vminv */
18267 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18268 vminv_f32 (float32x2_t __a)
18270 return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
18274 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18275 vminv_s8 (int8x8_t __a)
18277 return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
18281 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18282 vminv_s16 (int16x4_t __a)
18284 return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0);
18287 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18288 vminv_s32 (int32x2_t __a)
18290 return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0);
18293 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18294 vminv_u8 (uint8x8_t __a)
18296 return vget_lane_u8 ((uint8x8_t)
18297 __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
18301 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18302 vminv_u16 (uint16x4_t __a)
18304 return vget_lane_u16 ((uint16x4_t)
18305 __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
18309 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18310 vminv_u32 (uint32x2_t __a)
18312 return vget_lane_u32 ((uint32x2_t)
18313 __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
18317 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18318 vminvq_f32 (float32x4_t __a)
18320 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
18324 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18325 vminvq_f64 (float64x2_t __a)
18327 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
18331 __extension__ static __inline int8_t __attribute__ ((__always_inline__))
18332 vminvq_s8 (int8x16_t __a)
18334 return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0);
18337 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
18338 vminvq_s16 (int16x8_t __a)
18340 return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0);
18343 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
18344 vminvq_s32 (int32x4_t __a)
18346 return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0);
18349 __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
18350 vminvq_u8 (uint8x16_t __a)
18352 return vgetq_lane_u8 ((uint8x16_t)
18353 __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
18357 __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
18358 vminvq_u16 (uint16x8_t __a)
18360 return vgetq_lane_u16 ((uint16x8_t)
18361 __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
18365 __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
18366 vminvq_u32 (uint32x4_t __a)
18368 return vgetq_lane_u32 ((uint32x4_t)
18369 __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
18373 /* vminnmv */
18375 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18376 vminnmv_f32 (float32x2_t __a)
18378 return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0);
18381 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18382 vminnmvq_f32 (float32x4_t __a)
18384 return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0);
18387 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18388 vminnmvq_f64 (float64x2_t __a)
18390 return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0);
18393 /* vmla */
18395 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18396 vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18398 return a + b * c;
18401 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18402 vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18404 return __a + __b * __c;
18407 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18408 vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18410 return a + b * c;
18413 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18414 vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18416 return a + b * c;
18419 /* vmla_lane */
18421 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18422 vmla_lane_f32 (float32x2_t __a, float32x2_t __b,
18423 float32x2_t __c, const int __lane)
18425 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18428 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18429 vmla_lane_s16 (int16x4_t __a, int16x4_t __b,
18430 int16x4_t __c, const int __lane)
18432 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18435 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18436 vmla_lane_s32 (int32x2_t __a, int32x2_t __b,
18437 int32x2_t __c, const int __lane)
18439 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18442 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18443 vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18444 uint16x4_t __c, const int __lane)
18446 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18449 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18450 vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18451 uint32x2_t __c, const int __lane)
18453 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18456 /* vmla_laneq */
18458 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18459 vmla_laneq_f32 (float32x2_t __a, float32x2_t __b,
18460 float32x4_t __c, const int __lane)
18462 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18465 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18466 vmla_laneq_s16 (int16x4_t __a, int16x4_t __b,
18467 int16x8_t __c, const int __lane)
18469 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18472 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18473 vmla_laneq_s32 (int32x2_t __a, int32x2_t __b,
18474 int32x4_t __c, const int __lane)
18476 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18479 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18480 vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18481 uint16x8_t __c, const int __lane)
18483 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18486 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18487 vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18488 uint32x4_t __c, const int __lane)
18490 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18493 /* vmlaq_lane */
18495 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18496 vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b,
18497 float32x2_t __c, const int __lane)
18499 return (__a + (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18502 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18503 vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b,
18504 int16x4_t __c, const int __lane)
18506 return (__a + (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18509 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18510 vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b,
18511 int32x2_t __c, const int __lane)
18513 return (__a + (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18516 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18517 vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18518 uint16x4_t __c, const int __lane)
18520 return (__a + (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18523 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18524 vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18525 uint32x2_t __c, const int __lane)
18527 return (__a + (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18530 /* vmlaq_laneq */
18532 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18533 vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18534 float32x4_t __c, const int __lane)
18536 return (__a + (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18539 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18540 vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18541 int16x8_t __c, const int __lane)
18543 return (__a + (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18546 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18547 vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18548 int32x4_t __c, const int __lane)
18550 return (__a + (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18553 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18554 vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18555 uint16x8_t __c, const int __lane)
18557 return (__a + (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18560 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18561 vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18562 uint32x4_t __c, const int __lane)
18564 return (__a + (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18567 /* vmls */
18569 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18570 vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
18572 return a - b * c;
18575 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18576 vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c)
18578 return __a - __b * __c;
18581 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18582 vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c)
18584 return a - b * c;
18587 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18588 vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
18590 return a - b * c;
18593 /* vmls_lane */
18595 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18596 vmls_lane_f32 (float32x2_t __a, float32x2_t __b,
18597 float32x2_t __c, const int __lane)
18599 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18602 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18603 vmls_lane_s16 (int16x4_t __a, int16x4_t __b,
18604 int16x4_t __c, const int __lane)
18606 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18609 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18610 vmls_lane_s32 (int32x2_t __a, int32x2_t __b,
18611 int32x2_t __c, const int __lane)
18613 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18616 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18617 vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b,
18618 uint16x4_t __c, const int __lane)
18620 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18623 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18624 vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b,
18625 uint32x2_t __c, const int __lane)
18627 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18630 /* vmls_laneq */
18632 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18633 vmls_laneq_f32 (float32x2_t __a, float32x2_t __b,
18634 float32x4_t __c, const int __lane)
18636 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18639 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18640 vmls_laneq_s16 (int16x4_t __a, int16x4_t __b,
18641 int16x8_t __c, const int __lane)
18643 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18646 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18647 vmls_laneq_s32 (int32x2_t __a, int32x2_t __b,
18648 int32x4_t __c, const int __lane)
18650 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18653 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18654 vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b,
18655 uint16x8_t __c, const int __lane)
18657 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18660 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18661 vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b,
18662 uint32x4_t __c, const int __lane)
18664 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18667 /* vmlsq_lane */
18669 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18670 vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b,
18671 float32x2_t __c, const int __lane)
18673 return (__a - (__b * __aarch64_vget_lane_f32 (__c, __lane)));
18676 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18677 vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b,
18678 int16x4_t __c, const int __lane)
18680 return (__a - (__b * __aarch64_vget_lane_s16 (__c, __lane)));
18683 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18684 vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b,
18685 int32x2_t __c, const int __lane)
18687 return (__a - (__b * __aarch64_vget_lane_s32 (__c, __lane)));
18690 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18691 vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b,
18692 uint16x4_t __c, const int __lane)
18694 return (__a - (__b * __aarch64_vget_lane_u16 (__c, __lane)));
18697 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18698 vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b,
18699 uint32x2_t __c, const int __lane)
18701 return (__a - (__b * __aarch64_vget_lane_u32 (__c, __lane)));
18704 /* vmlsq_laneq */
18706 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18707 vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b,
18708 float32x4_t __c, const int __lane)
18710 return (__a - (__b * __aarch64_vgetq_lane_f32 (__c, __lane)));
18713 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18714 vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b,
18715 int16x8_t __c, const int __lane)
18717 return (__a - (__b * __aarch64_vgetq_lane_s16 (__c, __lane)));
18720 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18721 vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b,
18722 int32x4_t __c, const int __lane)
18724 return (__a - (__b * __aarch64_vgetq_lane_s32 (__c, __lane)));
18726 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18727 vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b,
18728 uint16x8_t __c, const int __lane)
18730 return (__a - (__b * __aarch64_vgetq_lane_u16 (__c, __lane)));
18733 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18734 vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b,
18735 uint32x4_t __c, const int __lane)
18737 return (__a - (__b * __aarch64_vgetq_lane_u32 (__c, __lane)));
18740 /* vmov_n_ */
18742 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18743 vmov_n_f32 (float32_t __a)
18745 return vdup_n_f32 (__a);
18748 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18749 vmov_n_f64 (float64_t __a)
18751 return (float64x1_t) {__a};
18754 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
18755 vmov_n_p8 (poly8_t __a)
18757 return vdup_n_p8 (__a);
18760 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
18761 vmov_n_p16 (poly16_t __a)
18763 return vdup_n_p16 (__a);
18766 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
18767 vmov_n_s8 (int8_t __a)
18769 return vdup_n_s8 (__a);
18772 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18773 vmov_n_s16 (int16_t __a)
18775 return vdup_n_s16 (__a);
18778 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18779 vmov_n_s32 (int32_t __a)
18781 return vdup_n_s32 (__a);
18784 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
18785 vmov_n_s64 (int64_t __a)
18787 return (int64x1_t) {__a};
18790 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
18791 vmov_n_u8 (uint8_t __a)
18793 return vdup_n_u8 (__a);
18796 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18797 vmov_n_u16 (uint16_t __a)
18799 return vdup_n_u16 (__a);
18802 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18803 vmov_n_u32 (uint32_t __a)
18805 return vdup_n_u32 (__a);
18808 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
18809 vmov_n_u64 (uint64_t __a)
18811 return (uint64x1_t) {__a};
18814 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
18815 vmovq_n_f32 (float32_t __a)
18817 return vdupq_n_f32 (__a);
18820 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
18821 vmovq_n_f64 (float64_t __a)
18823 return vdupq_n_f64 (__a);
18826 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
18827 vmovq_n_p8 (poly8_t __a)
18829 return vdupq_n_p8 (__a);
18832 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
18833 vmovq_n_p16 (poly16_t __a)
18835 return vdupq_n_p16 (__a);
18838 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
18839 vmovq_n_s8 (int8_t __a)
18841 return vdupq_n_s8 (__a);
18844 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
18845 vmovq_n_s16 (int16_t __a)
18847 return vdupq_n_s16 (__a);
18850 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
18851 vmovq_n_s32 (int32_t __a)
18853 return vdupq_n_s32 (__a);
18856 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
18857 vmovq_n_s64 (int64_t __a)
18859 return vdupq_n_s64 (__a);
18862 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
18863 vmovq_n_u8 (uint8_t __a)
18865 return vdupq_n_u8 (__a);
18868 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
18869 vmovq_n_u16 (uint16_t __a)
18871 return vdupq_n_u16 (__a);
18874 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
18875 vmovq_n_u32 (uint32_t __a)
18877 return vdupq_n_u32 (__a);
18880 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
18881 vmovq_n_u64 (uint64_t __a)
18883 return vdupq_n_u64 (__a);
18886 /* vmul_lane */
18888 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18889 vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane)
18891 return __a * __aarch64_vget_lane_f32 (__b, __lane);
18894 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18895 vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane)
18897 return __a * __b;
18900 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18901 vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane)
18903 return __a * __aarch64_vget_lane_s16 (__b, __lane);
18906 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18907 vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane)
18909 return __a * __aarch64_vget_lane_s32 (__b, __lane);
18912 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18913 vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane)
18915 return __a * __aarch64_vget_lane_u16 (__b, __lane);
18918 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18919 vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
18921 return __a * __aarch64_vget_lane_u32 (__b, __lane);
18924 /* vmuld_lane */
18926 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18927 vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
18929 return __a * vget_lane_f64 (__b, __lane);
18932 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
18933 vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
18935 return __a * vgetq_lane_f64 (__b, __lane);
18938 /* vmuls_lane */
18940 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18941 vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
18943 return __a * vget_lane_f32 (__b, __lane);
18946 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
18947 vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
18949 return __a * vgetq_lane_f32 (__b, __lane);
18952 /* vmul_laneq */
18954 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
18955 vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane)
18957 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
18960 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18961 vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane)
18963 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
18966 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
18967 vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane)
18969 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
18972 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
18973 vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane)
18975 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
18978 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
18979 vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane)
18981 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
18984 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
18985 vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
18987 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
18990 /* vmul_n */
18992 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
18993 vmul_n_f64 (float64x1_t __a, float64_t __b)
18995 return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
18998 /* vmulq_lane */
19000 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19001 vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane)
19003 return __a * __aarch64_vget_lane_f32 (__b, __lane);
19006 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19007 vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane)
19009 __builtin_aarch64_im_lane_boundsi (__lane, 1);
19010 return __a * __b[0];
19013 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19014 vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane)
19016 return __a * __aarch64_vget_lane_s16 (__b, __lane);
19019 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19020 vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane)
19022 return __a * __aarch64_vget_lane_s32 (__b, __lane);
19025 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19026 vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane)
19028 return __a * __aarch64_vget_lane_u16 (__b, __lane);
19031 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19032 vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane)
19034 return __a * __aarch64_vget_lane_u32 (__b, __lane);
19037 /* vmulq_laneq */
19039 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19040 vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane)
19042 return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
19045 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19046 vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane)
19048 return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
19051 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19052 vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane)
19054 return __a * __aarch64_vgetq_lane_s16 (__b, __lane);
19057 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19058 vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane)
19060 return __a * __aarch64_vgetq_lane_s32 (__b, __lane);
19063 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
19064 vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane)
19066 return __a * __aarch64_vgetq_lane_u16 (__b, __lane);
19069 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
19070 vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
19072 return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
19075 /* vneg */
19077 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
19078 vneg_f32 (float32x2_t __a)
19080 return -__a;
19083 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
19084 vneg_f64 (float64x1_t __a)
19086 return -__a;
19089 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19090 vneg_s8 (int8x8_t __a)
19092 return -__a;
19095 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19096 vneg_s16 (int16x4_t __a)
19098 return -__a;
19101 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19102 vneg_s32 (int32x2_t __a)
19104 return -__a;
19107 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19108 vneg_s64 (int64x1_t __a)
19110 return -__a;
19113 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
19114 vnegq_f32 (float32x4_t __a)
19116 return -__a;
19119 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
19120 vnegq_f64 (float64x2_t __a)
19122 return -__a;
19125 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
19126 vnegq_s8 (int8x16_t __a)
19128 return -__a;
19131 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19132 vnegq_s16 (int16x8_t __a)
19134 return -__a;
19137 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19138 vnegq_s32 (int32x4_t __a)
19140 return -__a;
19143 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19144 vnegq_s64 (int64x2_t __a)
19146 return -__a;
19149 /* vpadd */
19151 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19152 vpadd_s8 (int8x8_t __a, int8x8_t __b)
19154 return __builtin_aarch64_addpv8qi (__a, __b);
19157 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19158 vpadd_s16 (int16x4_t __a, int16x4_t __b)
19160 return __builtin_aarch64_addpv4hi (__a, __b);
19163 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19164 vpadd_s32 (int32x2_t __a, int32x2_t __b)
19166 return __builtin_aarch64_addpv2si (__a, __b);
19169 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19170 vpadd_u8 (uint8x8_t __a, uint8x8_t __b)
19172 return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a,
19173 (int8x8_t) __b);
19176 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19177 vpadd_u16 (uint16x4_t __a, uint16x4_t __b)
19179 return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a,
19180 (int16x4_t) __b);
19183 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19184 vpadd_u32 (uint32x2_t __a, uint32x2_t __b)
19186 return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a,
19187 (int32x2_t) __b);
19190 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
19191 vpaddd_f64 (float64x2_t __a)
19193 return vgetq_lane_f64 (__builtin_aarch64_reduc_splus_v2df (__a), 0);
19196 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19197 vpaddd_s64 (int64x2_t __a)
19199 return __builtin_aarch64_addpdi (__a);
19202 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19203 vpaddd_u64 (uint64x2_t __a)
19205 return __builtin_aarch64_addpdi ((int64x2_t) __a);
19208 /* vqabs */
19210 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19211 vqabsq_s64 (int64x2_t __a)
19213 return (int64x2_t) __builtin_aarch64_sqabsv2di (__a);
19216 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19217 vqabsb_s8 (int8x1_t __a)
19219 return (int8x1_t) __builtin_aarch64_sqabsqi (__a);
19222 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19223 vqabsh_s16 (int16x1_t __a)
19225 return (int16x1_t) __builtin_aarch64_sqabshi (__a);
19228 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19229 vqabss_s32 (int32x1_t __a)
19231 return (int32x1_t) __builtin_aarch64_sqabssi (__a);
19234 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19235 vqabsd_s64 (int64_t __a)
19237 return __builtin_aarch64_sqabsdi (__a);
19240 /* vqadd */
19242 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19243 vqaddb_s8 (int8x1_t __a, int8x1_t __b)
19245 return (int8x1_t) __builtin_aarch64_sqaddqi (__a, __b);
19248 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19249 vqaddh_s16 (int16x1_t __a, int16x1_t __b)
19251 return (int16x1_t) __builtin_aarch64_sqaddhi (__a, __b);
19254 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19255 vqadds_s32 (int32x1_t __a, int32x1_t __b)
19257 return (int32x1_t) __builtin_aarch64_sqaddsi (__a, __b);
19260 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19261 vqaddd_s64 (int64_t __a, int64_t __b)
19263 return __builtin_aarch64_sqadddi (__a, __b);
19266 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
19267 vqaddb_u8 (uint8x1_t __a, uint8x1_t __b)
19269 return (uint8x1_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
19272 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
19273 vqaddh_u16 (uint16x1_t __a, uint16x1_t __b)
19275 return (uint16x1_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
19278 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
19279 vqadds_u32 (uint32x1_t __a, uint32x1_t __b)
19281 return (uint32x1_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
19284 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
19285 vqaddd_u64 (uint64_t __a, uint64_t __b)
19287 return __builtin_aarch64_uqadddi_uuu (__a, __b);
19290 /* vqdmlal */
19292 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19293 vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19295 return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c);
19298 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19299 vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19301 return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c);
19304 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19305 vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19306 int const __d)
19308 return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d);
19311 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19312 vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19313 int const __d)
19315 return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d);
19318 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19319 vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19321 return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c);
19324 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19325 vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19327 return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d);
19330 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19331 vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19333 return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d);
19336 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19337 vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19339 return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c);
19342 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19343 vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19345 return __builtin_aarch64_sqdmlalv2si (__a, __b, __c);
19348 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19349 vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19351 return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c);
19354 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19355 vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19356 int const __d)
19358 return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d);
19361 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19362 vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19363 int const __d)
19365 return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d);
19368 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19369 vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19371 return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c);
19374 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19375 vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19377 return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d);
19380 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19381 vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19383 return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d);
19386 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19387 vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19389 return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c);
19392 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19393 vqdmlalh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
19395 return __builtin_aarch64_sqdmlalhi (__a, __b, __c);
19398 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19399 vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
19401 return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
19404 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19405 vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19407 return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
19410 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19411 vqdmlals_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
19413 return __builtin_aarch64_sqdmlalsi (__a, __b, __c);
19416 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19417 vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19419 return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d);
19422 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19423 vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19425 return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
19428 /* vqdmlsl */
19430 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19431 vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c)
19433 return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c);
19436 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19437 vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c)
19439 return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c);
19442 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19443 vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c,
19444 int const __d)
19446 return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d);
19449 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19450 vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c,
19451 int const __d)
19453 return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d);
19456 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19457 vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c)
19459 return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c);
19462 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19463 vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d)
19465 return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d);
19468 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19469 vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d)
19471 return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d);
19474 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19475 vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c)
19477 return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c);
19480 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19481 vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
19483 return __builtin_aarch64_sqdmlslv2si (__a, __b, __c);
19486 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19487 vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c)
19489 return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c);
19492 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19493 vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c,
19494 int const __d)
19496 return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d);
19499 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19500 vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c,
19501 int const __d)
19503 return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d);
19506 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19507 vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c)
19509 return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c);
19512 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19513 vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d)
19515 return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d);
19518 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19519 vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d)
19521 return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d);
19524 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19525 vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c)
19527 return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c);
19530 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19531 vqdmlslh_s16 (int32x1_t __a, int16x1_t __b, int16x1_t __c)
19533 return __builtin_aarch64_sqdmlslhi (__a, __b, __c);
19536 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19537 vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
19539 return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
19542 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19543 vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
19545 return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
19548 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19549 vqdmlsls_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
19551 return __builtin_aarch64_sqdmlslsi (__a, __b, __c);
19554 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19555 vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d)
19557 return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d);
19560 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19561 vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
19563 return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
19566 /* vqdmulh */
19568 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19569 vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19571 return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c);
19574 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19575 vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19577 return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c);
19580 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19581 vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19583 return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c);
19586 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19587 vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19589 return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c);
19592 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19593 vqdmulhh_s16 (int16x1_t __a, int16x1_t __b)
19595 return (int16x1_t) __builtin_aarch64_sqdmulhhi (__a, __b);
19598 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19599 vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
19601 return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
19604 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19605 vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19607 return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
19610 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19611 vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
19613 return (int32x1_t) __builtin_aarch64_sqdmulhsi (__a, __b);
19616 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19617 vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
19619 return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
19622 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19623 vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19625 return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
19628 /* vqdmull */
19630 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19631 vqdmull_s16 (int16x4_t __a, int16x4_t __b)
19633 return __builtin_aarch64_sqdmullv4hi (__a, __b);
19636 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19637 vqdmull_high_s16 (int16x8_t __a, int16x8_t __b)
19639 return __builtin_aarch64_sqdmull2v8hi (__a, __b);
19642 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19643 vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c)
19645 return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c);
19648 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19649 vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c)
19651 return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c);
19654 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19655 vqdmull_high_n_s16 (int16x8_t __a, int16_t __b)
19657 return __builtin_aarch64_sqdmull2_nv8hi (__a, __b);
19660 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19661 vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c)
19663 return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c);
19666 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19667 vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c)
19669 return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c);
19672 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19673 vqdmull_n_s16 (int16x4_t __a, int16_t __b)
19675 return __builtin_aarch64_sqdmull_nv4hi (__a, __b);
19678 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19679 vqdmull_s32 (int32x2_t __a, int32x2_t __b)
19681 return __builtin_aarch64_sqdmullv2si (__a, __b);
19684 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19685 vqdmull_high_s32 (int32x4_t __a, int32x4_t __b)
19687 return __builtin_aarch64_sqdmull2v4si (__a, __b);
19690 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19691 vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c)
19693 return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c);
19696 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19697 vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c)
19699 return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c);
19702 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19703 vqdmull_high_n_s32 (int32x4_t __a, int32_t __b)
19705 return __builtin_aarch64_sqdmull2_nv4si (__a, __b);
19708 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19709 vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c)
19711 return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c);
19714 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19715 vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c)
19717 return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c);
19720 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19721 vqdmull_n_s32 (int32x2_t __a, int32_t __b)
19723 return __builtin_aarch64_sqdmull_nv2si (__a, __b);
19726 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19727 vqdmullh_s16 (int16x1_t __a, int16x1_t __b)
19729 return (int32x1_t) __builtin_aarch64_sqdmullhi (__a, __b);
19732 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19733 vqdmullh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
19735 return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c);
19738 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19739 vqdmulls_s32 (int32x1_t __a, int32x1_t __b)
19741 return __builtin_aarch64_sqdmullsi (__a, __b);
19744 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19745 vqdmulls_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
19747 return (int64x1_t) {__builtin_aarch64_sqdmull_lanesi (__a, __b, __c)};
19750 /* vqmovn */
19752 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19753 vqmovn_s16 (int16x8_t __a)
19755 return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a);
19758 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19759 vqmovn_s32 (int32x4_t __a)
19761 return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a);
19764 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19765 vqmovn_s64 (int64x2_t __a)
19767 return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a);
19770 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19771 vqmovn_u16 (uint16x8_t __a)
19773 return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a);
19776 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19777 vqmovn_u32 (uint32x4_t __a)
19779 return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a);
19782 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19783 vqmovn_u64 (uint64x2_t __a)
19785 return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a);
19788 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19789 vqmovnh_s16 (int16x1_t __a)
19791 return (int8x1_t) __builtin_aarch64_sqmovnhi (__a);
19794 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19795 vqmovns_s32 (int32x1_t __a)
19797 return (int16x1_t) __builtin_aarch64_sqmovnsi (__a);
19800 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19801 vqmovnd_s64 (int64_t __a)
19803 return (int32x1_t) __builtin_aarch64_sqmovndi (__a);
19806 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
19807 vqmovnh_u16 (uint16x1_t __a)
19809 return (uint8x1_t) __builtin_aarch64_uqmovnhi (__a);
19812 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
19813 vqmovns_u32 (uint32x1_t __a)
19815 return (uint16x1_t) __builtin_aarch64_uqmovnsi (__a);
19818 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
19819 vqmovnd_u64 (uint64_t __a)
19821 return (uint32x1_t) __builtin_aarch64_uqmovndi (__a);
19824 /* vqmovun */
19826 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19827 vqmovun_s16 (int16x8_t __a)
19829 return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
19832 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19833 vqmovun_s32 (int32x4_t __a)
19835 return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
19838 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19839 vqmovun_s64 (int64x2_t __a)
19841 return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
19844 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19845 vqmovunh_s16 (int16x1_t __a)
19847 return (int8x1_t) __builtin_aarch64_sqmovunhi (__a);
19850 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19851 vqmovuns_s32 (int32x1_t __a)
19853 return (int16x1_t) __builtin_aarch64_sqmovunsi (__a);
19856 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19857 vqmovund_s64 (int64_t __a)
19859 return (int32x1_t) __builtin_aarch64_sqmovundi (__a);
19862 /* vqneg */
19864 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
19865 vqnegq_s64 (int64x2_t __a)
19867 return (int64x2_t) __builtin_aarch64_sqnegv2di (__a);
19870 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
19871 vqnegb_s8 (int8x1_t __a)
19873 return (int8x1_t) __builtin_aarch64_sqnegqi (__a);
19876 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19877 vqnegh_s16 (int16x1_t __a)
19879 return (int16x1_t) __builtin_aarch64_sqneghi (__a);
19882 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19883 vqnegs_s32 (int32x1_t __a)
19885 return (int32x1_t) __builtin_aarch64_sqnegsi (__a);
19888 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
19889 vqnegd_s64 (int64_t __a)
19891 return __builtin_aarch64_sqnegdi (__a);
19894 /* vqrdmulh */
19896 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19897 vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c)
19899 return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c);
19902 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19903 vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c)
19905 return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c);
19908 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
19909 vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c)
19911 return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c);
19914 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
19915 vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c)
19917 return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c);
19920 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19921 vqrdmulhh_s16 (int16x1_t __a, int16x1_t __b)
19923 return (int16x1_t) __builtin_aarch64_sqrdmulhhi (__a, __b);
19926 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
19927 vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
19929 return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
19932 __extension__ static __inline int16_t __attribute__ ((__always_inline__))
19933 vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
19935 return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
19938 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19939 vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
19941 return (int32x1_t) __builtin_aarch64_sqrdmulhsi (__a, __b);
19944 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
19945 vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
19947 return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
19950 __extension__ static __inline int32_t __attribute__ ((__always_inline__))
19951 vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
19953 return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
19956 /* vqrshl */
19958 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
19959 vqrshl_s8 (int8x8_t __a, int8x8_t __b)
19961 return __builtin_aarch64_sqrshlv8qi (__a, __b);
19964 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
19965 vqrshl_s16 (int16x4_t __a, int16x4_t __b)
19967 return __builtin_aarch64_sqrshlv4hi (__a, __b);
19970 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
19971 vqrshl_s32 (int32x2_t __a, int32x2_t __b)
19973 return __builtin_aarch64_sqrshlv2si (__a, __b);
19976 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
19977 vqrshl_s64 (int64x1_t __a, int64x1_t __b)
19979 return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])};
19982 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
19983 vqrshl_u8 (uint8x8_t __a, int8x8_t __b)
19985 return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b);
19988 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
19989 vqrshl_u16 (uint16x4_t __a, int16x4_t __b)
19991 return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b);
19994 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
19995 vqrshl_u32 (uint32x2_t __a, int32x2_t __b)
19997 return __builtin_aarch64_uqrshlv2si_uus ( __a, __b);
20000 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20001 vqrshl_u64 (uint64x1_t __a, int64x1_t __b)
20003 return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])};
20006 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20007 vqrshlq_s8 (int8x16_t __a, int8x16_t __b)
20009 return __builtin_aarch64_sqrshlv16qi (__a, __b);
20012 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20013 vqrshlq_s16 (int16x8_t __a, int16x8_t __b)
20015 return __builtin_aarch64_sqrshlv8hi (__a, __b);
20018 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20019 vqrshlq_s32 (int32x4_t __a, int32x4_t __b)
20021 return __builtin_aarch64_sqrshlv4si (__a, __b);
20024 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20025 vqrshlq_s64 (int64x2_t __a, int64x2_t __b)
20027 return __builtin_aarch64_sqrshlv2di (__a, __b);
20030 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20031 vqrshlq_u8 (uint8x16_t __a, int8x16_t __b)
20033 return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b);
20036 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20037 vqrshlq_u16 (uint16x8_t __a, int16x8_t __b)
20039 return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b);
20042 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20043 vqrshlq_u32 (uint32x4_t __a, int32x4_t __b)
20045 return __builtin_aarch64_uqrshlv4si_uus ( __a, __b);
20048 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20049 vqrshlq_u64 (uint64x2_t __a, int64x2_t __b)
20051 return __builtin_aarch64_uqrshlv2di_uus ( __a, __b);
20054 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20055 vqrshlb_s8 (int8x1_t __a, int8x1_t __b)
20057 return __builtin_aarch64_sqrshlqi (__a, __b);
20060 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20061 vqrshlh_s16 (int16x1_t __a, int16x1_t __b)
20063 return __builtin_aarch64_sqrshlhi (__a, __b);
20066 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20067 vqrshls_s32 (int32x1_t __a, int32x1_t __b)
20069 return __builtin_aarch64_sqrshlsi (__a, __b);
20072 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20073 vqrshld_s64 (int64_t __a, int64_t __b)
20075 return __builtin_aarch64_sqrshldi (__a, __b);
20078 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20079 vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b)
20081 return __builtin_aarch64_uqrshlqi_uus (__a, __b);
20084 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20085 vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b)
20087 return __builtin_aarch64_uqrshlhi_uus (__a, __b);
20090 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20091 vqrshls_u32 (uint32x1_t __a, uint32x1_t __b)
20093 return __builtin_aarch64_uqrshlsi_uus (__a, __b);
20096 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20097 vqrshld_u64 (uint64_t __a, uint64_t __b)
20099 return __builtin_aarch64_uqrshldi_uus (__a, __b);
20102 /* vqrshrn */
20104 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20105 vqrshrn_n_s16 (int16x8_t __a, const int __b)
20107 return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b);
20110 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20111 vqrshrn_n_s32 (int32x4_t __a, const int __b)
20113 return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b);
20116 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20117 vqrshrn_n_s64 (int64x2_t __a, const int __b)
20119 return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b);
20122 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20123 vqrshrn_n_u16 (uint16x8_t __a, const int __b)
20125 return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b);
20128 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20129 vqrshrn_n_u32 (uint32x4_t __a, const int __b)
20131 return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b);
20134 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20135 vqrshrn_n_u64 (uint64x2_t __a, const int __b)
20137 return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b);
20140 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20141 vqrshrnh_n_s16 (int16x1_t __a, const int __b)
20143 return (int8x1_t) __builtin_aarch64_sqrshrn_nhi (__a, __b);
20146 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20147 vqrshrns_n_s32 (int32x1_t __a, const int __b)
20149 return (int16x1_t) __builtin_aarch64_sqrshrn_nsi (__a, __b);
20152 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20153 vqrshrnd_n_s64 (int64_t __a, const int __b)
20155 return (int32x1_t) __builtin_aarch64_sqrshrn_ndi (__a, __b);
20158 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20159 vqrshrnh_n_u16 (uint16x1_t __a, const int __b)
20161 return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b);
20164 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20165 vqrshrns_n_u32 (uint32x1_t __a, const int __b)
20167 return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b);
20170 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20171 vqrshrnd_n_u64 (uint64_t __a, const int __b)
20173 return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b);
20176 /* vqrshrun */
20178 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20179 vqrshrun_n_s16 (int16x8_t __a, const int __b)
20181 return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b);
20184 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20185 vqrshrun_n_s32 (int32x4_t __a, const int __b)
20187 return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b);
20190 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20191 vqrshrun_n_s64 (int64x2_t __a, const int __b)
20193 return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b);
20196 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20197 vqrshrunh_n_s16 (int16x1_t __a, const int __b)
20199 return (int8x1_t) __builtin_aarch64_sqrshrun_nhi (__a, __b);
20202 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20203 vqrshruns_n_s32 (int32x1_t __a, const int __b)
20205 return (int16x1_t) __builtin_aarch64_sqrshrun_nsi (__a, __b);
20208 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20209 vqrshrund_n_s64 (int64_t __a, const int __b)
20211 return (int32x1_t) __builtin_aarch64_sqrshrun_ndi (__a, __b);
20214 /* vqshl */
20216 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20217 vqshl_s8 (int8x8_t __a, int8x8_t __b)
20219 return __builtin_aarch64_sqshlv8qi (__a, __b);
20222 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20223 vqshl_s16 (int16x4_t __a, int16x4_t __b)
20225 return __builtin_aarch64_sqshlv4hi (__a, __b);
20228 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20229 vqshl_s32 (int32x2_t __a, int32x2_t __b)
20231 return __builtin_aarch64_sqshlv2si (__a, __b);
20234 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20235 vqshl_s64 (int64x1_t __a, int64x1_t __b)
20237 return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])};
20240 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20241 vqshl_u8 (uint8x8_t __a, int8x8_t __b)
20243 return __builtin_aarch64_uqshlv8qi_uus ( __a, __b);
20246 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20247 vqshl_u16 (uint16x4_t __a, int16x4_t __b)
20249 return __builtin_aarch64_uqshlv4hi_uus ( __a, __b);
20252 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20253 vqshl_u32 (uint32x2_t __a, int32x2_t __b)
20255 return __builtin_aarch64_uqshlv2si_uus ( __a, __b);
20258 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20259 vqshl_u64 (uint64x1_t __a, int64x1_t __b)
20261 return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])};
20264 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20265 vqshlq_s8 (int8x16_t __a, int8x16_t __b)
20267 return __builtin_aarch64_sqshlv16qi (__a, __b);
20270 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20271 vqshlq_s16 (int16x8_t __a, int16x8_t __b)
20273 return __builtin_aarch64_sqshlv8hi (__a, __b);
20276 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20277 vqshlq_s32 (int32x4_t __a, int32x4_t __b)
20279 return __builtin_aarch64_sqshlv4si (__a, __b);
20282 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20283 vqshlq_s64 (int64x2_t __a, int64x2_t __b)
20285 return __builtin_aarch64_sqshlv2di (__a, __b);
20288 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20289 vqshlq_u8 (uint8x16_t __a, int8x16_t __b)
20291 return __builtin_aarch64_uqshlv16qi_uus ( __a, __b);
20294 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20295 vqshlq_u16 (uint16x8_t __a, int16x8_t __b)
20297 return __builtin_aarch64_uqshlv8hi_uus ( __a, __b);
20300 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20301 vqshlq_u32 (uint32x4_t __a, int32x4_t __b)
20303 return __builtin_aarch64_uqshlv4si_uus ( __a, __b);
20306 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20307 vqshlq_u64 (uint64x2_t __a, int64x2_t __b)
20309 return __builtin_aarch64_uqshlv2di_uus ( __a, __b);
20312 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20313 vqshlb_s8 (int8x1_t __a, int8x1_t __b)
20315 return __builtin_aarch64_sqshlqi (__a, __b);
20318 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20319 vqshlh_s16 (int16x1_t __a, int16x1_t __b)
20321 return __builtin_aarch64_sqshlhi (__a, __b);
20324 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20325 vqshls_s32 (int32x1_t __a, int32x1_t __b)
20327 return __builtin_aarch64_sqshlsi (__a, __b);
20330 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20331 vqshld_s64 (int64_t __a, int64_t __b)
20333 return __builtin_aarch64_sqshldi (__a, __b);
20336 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20337 vqshlb_u8 (uint8x1_t __a, uint8x1_t __b)
20339 return __builtin_aarch64_uqshlqi_uus (__a, __b);
20342 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20343 vqshlh_u16 (uint16x1_t __a, uint16x1_t __b)
20345 return __builtin_aarch64_uqshlhi_uus (__a, __b);
20348 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20349 vqshls_u32 (uint32x1_t __a, uint32x1_t __b)
20351 return __builtin_aarch64_uqshlsi_uus (__a, __b);
20354 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20355 vqshld_u64 (uint64_t __a, uint64_t __b)
20357 return __builtin_aarch64_uqshldi_uus (__a, __b);
20360 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20361 vqshl_n_s8 (int8x8_t __a, const int __b)
20363 return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b);
20366 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20367 vqshl_n_s16 (int16x4_t __a, const int __b)
20369 return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b);
20372 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20373 vqshl_n_s32 (int32x2_t __a, const int __b)
20375 return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b);
20378 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
20379 vqshl_n_s64 (int64x1_t __a, const int __b)
20381 return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)};
20384 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20385 vqshl_n_u8 (uint8x8_t __a, const int __b)
20387 return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b);
20390 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20391 vqshl_n_u16 (uint16x4_t __a, const int __b)
20393 return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b);
20396 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20397 vqshl_n_u32 (uint32x2_t __a, const int __b)
20399 return __builtin_aarch64_uqshl_nv2si_uus (__a, __b);
20402 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20403 vqshl_n_u64 (uint64x1_t __a, const int __b)
20405 return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)};
20408 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20409 vqshlq_n_s8 (int8x16_t __a, const int __b)
20411 return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b);
20414 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20415 vqshlq_n_s16 (int16x8_t __a, const int __b)
20417 return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b);
20420 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
20421 vqshlq_n_s32 (int32x4_t __a, const int __b)
20423 return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b);
20426 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
20427 vqshlq_n_s64 (int64x2_t __a, const int __b)
20429 return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b);
20432 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20433 vqshlq_n_u8 (uint8x16_t __a, const int __b)
20435 return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b);
20438 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20439 vqshlq_n_u16 (uint16x8_t __a, const int __b)
20441 return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b);
20444 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20445 vqshlq_n_u32 (uint32x4_t __a, const int __b)
20447 return __builtin_aarch64_uqshl_nv4si_uus (__a, __b);
20450 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20451 vqshlq_n_u64 (uint64x2_t __a, const int __b)
20453 return __builtin_aarch64_uqshl_nv2di_uus (__a, __b);
20456 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20457 vqshlb_n_s8 (int8x1_t __a, const int __b)
20459 return (int8x1_t) __builtin_aarch64_sqshl_nqi (__a, __b);
20462 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20463 vqshlh_n_s16 (int16x1_t __a, const int __b)
20465 return (int16x1_t) __builtin_aarch64_sqshl_nhi (__a, __b);
20468 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20469 vqshls_n_s32 (int32x1_t __a, const int __b)
20471 return (int32x1_t) __builtin_aarch64_sqshl_nsi (__a, __b);
20474 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20475 vqshld_n_s64 (int64_t __a, const int __b)
20477 return __builtin_aarch64_sqshl_ndi (__a, __b);
20480 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20481 vqshlb_n_u8 (uint8x1_t __a, const int __b)
20483 return __builtin_aarch64_uqshl_nqi_uus (__a, __b);
20486 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20487 vqshlh_n_u16 (uint16x1_t __a, const int __b)
20489 return __builtin_aarch64_uqshl_nhi_uus (__a, __b);
20492 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20493 vqshls_n_u32 (uint32x1_t __a, const int __b)
20495 return __builtin_aarch64_uqshl_nsi_uus (__a, __b);
20498 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20499 vqshld_n_u64 (uint64_t __a, const int __b)
20501 return __builtin_aarch64_uqshl_ndi_uus (__a, __b);
20504 /* vqshlu */
20506 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20507 vqshlu_n_s8 (int8x8_t __a, const int __b)
20509 return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b);
20512 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20513 vqshlu_n_s16 (int16x4_t __a, const int __b)
20515 return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b);
20518 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20519 vqshlu_n_s32 (int32x2_t __a, const int __b)
20521 return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b);
20524 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
20525 vqshlu_n_s64 (int64x1_t __a, const int __b)
20527 return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)};
20530 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20531 vqshluq_n_s8 (int8x16_t __a, const int __b)
20533 return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b);
20536 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20537 vqshluq_n_s16 (int16x8_t __a, const int __b)
20539 return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b);
20542 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
20543 vqshluq_n_s32 (int32x4_t __a, const int __b)
20545 return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b);
20548 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
20549 vqshluq_n_s64 (int64x2_t __a, const int __b)
20551 return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b);
20554 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20555 vqshlub_n_s8 (int8x1_t __a, const int __b)
20557 return (int8x1_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b);
20560 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20561 vqshluh_n_s16 (int16x1_t __a, const int __b)
20563 return (int16x1_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b);
20566 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20567 vqshlus_n_s32 (int32x1_t __a, const int __b)
20569 return (int32x1_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b);
20572 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20573 vqshlud_n_s64 (int64_t __a, const int __b)
20575 return __builtin_aarch64_sqshlu_ndi_uss (__a, __b);
20578 /* vqshrn */
20580 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20581 vqshrn_n_s16 (int16x8_t __a, const int __b)
20583 return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b);
20586 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20587 vqshrn_n_s32 (int32x4_t __a, const int __b)
20589 return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b);
20592 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
20593 vqshrn_n_s64 (int64x2_t __a, const int __b)
20595 return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b);
20598 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20599 vqshrn_n_u16 (uint16x8_t __a, const int __b)
20601 return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b);
20604 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20605 vqshrn_n_u32 (uint32x4_t __a, const int __b)
20607 return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b);
20610 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20611 vqshrn_n_u64 (uint64x2_t __a, const int __b)
20613 return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b);
20616 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20617 vqshrnh_n_s16 (int16x1_t __a, const int __b)
20619 return (int8x1_t) __builtin_aarch64_sqshrn_nhi (__a, __b);
20622 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20623 vqshrns_n_s32 (int32x1_t __a, const int __b)
20625 return (int16x1_t) __builtin_aarch64_sqshrn_nsi (__a, __b);
20628 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20629 vqshrnd_n_s64 (int64_t __a, const int __b)
20631 return (int32x1_t) __builtin_aarch64_sqshrn_ndi (__a, __b);
20634 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20635 vqshrnh_n_u16 (uint16x1_t __a, const int __b)
20637 return __builtin_aarch64_uqshrn_nhi_uus (__a, __b);
20640 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20641 vqshrns_n_u32 (uint32x1_t __a, const int __b)
20643 return __builtin_aarch64_uqshrn_nsi_uus (__a, __b);
20646 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20647 vqshrnd_n_u64 (uint64_t __a, const int __b)
20649 return __builtin_aarch64_uqshrn_ndi_uus (__a, __b);
20652 /* vqshrun */
20654 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20655 vqshrun_n_s16 (int16x8_t __a, const int __b)
20657 return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b);
20660 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20661 vqshrun_n_s32 (int32x4_t __a, const int __b)
20663 return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b);
20666 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
20667 vqshrun_n_s64 (int64x2_t __a, const int __b)
20669 return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b);
20672 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20673 vqshrunh_n_s16 (int16x1_t __a, const int __b)
20675 return (int8x1_t) __builtin_aarch64_sqshrun_nhi (__a, __b);
20678 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20679 vqshruns_n_s32 (int32x1_t __a, const int __b)
20681 return (int16x1_t) __builtin_aarch64_sqshrun_nsi (__a, __b);
20684 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20685 vqshrund_n_s64 (int64_t __a, const int __b)
20687 return (int32x1_t) __builtin_aarch64_sqshrun_ndi (__a, __b);
20690 /* vqsub */
20692 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
20693 vqsubb_s8 (int8x1_t __a, int8x1_t __b)
20695 return (int8x1_t) __builtin_aarch64_sqsubqi (__a, __b);
20698 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
20699 vqsubh_s16 (int16x1_t __a, int16x1_t __b)
20701 return (int16x1_t) __builtin_aarch64_sqsubhi (__a, __b);
20704 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
20705 vqsubs_s32 (int32x1_t __a, int32x1_t __b)
20707 return (int32x1_t) __builtin_aarch64_sqsubsi (__a, __b);
20710 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
20711 vqsubd_s64 (int64_t __a, int64_t __b)
20713 return __builtin_aarch64_sqsubdi (__a, __b);
20716 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
20717 vqsubb_u8 (uint8x1_t __a, uint8x1_t __b)
20719 return (uint8x1_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
20722 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
20723 vqsubh_u16 (uint16x1_t __a, uint16x1_t __b)
20725 return (uint16x1_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
20728 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
20729 vqsubs_u32 (uint32x1_t __a, uint32x1_t __b)
20731 return (uint32x1_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
20734 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
20735 vqsubd_u64 (uint64_t __a, uint64_t __b)
20737 return __builtin_aarch64_uqsubdi_uuu (__a, __b);
20740 /* vrbit */
20742 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20743 vrbit_p8 (poly8x8_t __a)
20745 return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20748 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20749 vrbit_s8 (int8x8_t __a)
20751 return __builtin_aarch64_rbitv8qi (__a);
20754 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20755 vrbit_u8 (uint8x8_t __a)
20757 return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a);
20760 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20761 vrbitq_p8 (poly8x16_t __a)
20763 return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a);
20766 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20767 vrbitq_s8 (int8x16_t __a)
20769 return __builtin_aarch64_rbitv16qi (__a);
20772 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20773 vrbitq_u8 (uint8x16_t __a)
20775 return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a);
20778 /* vrecpe */
20780 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20781 vrecpes_f32 (float32_t __a)
20783 return __builtin_aarch64_frecpesf (__a);
20786 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20787 vrecped_f64 (float64_t __a)
20789 return __builtin_aarch64_frecpedf (__a);
20792 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20793 vrecpe_f32 (float32x2_t __a)
20795 return __builtin_aarch64_frecpev2sf (__a);
20798 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20799 vrecpeq_f32 (float32x4_t __a)
20801 return __builtin_aarch64_frecpev4sf (__a);
20804 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20805 vrecpeq_f64 (float64x2_t __a)
20807 return __builtin_aarch64_frecpev2df (__a);
20810 /* vrecps */
20812 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20813 vrecpss_f32 (float32_t __a, float32_t __b)
20815 return __builtin_aarch64_frecpssf (__a, __b);
20818 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20819 vrecpsd_f64 (float64_t __a, float64_t __b)
20821 return __builtin_aarch64_frecpsdf (__a, __b);
20824 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20825 vrecps_f32 (float32x2_t __a, float32x2_t __b)
20827 return __builtin_aarch64_frecpsv2sf (__a, __b);
20830 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
20831 vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
20833 return __builtin_aarch64_frecpsv4sf (__a, __b);
20836 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
20837 vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
20839 return __builtin_aarch64_frecpsv2df (__a, __b);
20842 /* vrecpx */
20844 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
20845 vrecpxs_f32 (float32_t __a)
20847 return __builtin_aarch64_frecpxsf (__a);
20850 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
20851 vrecpxd_f64 (float64_t __a)
20853 return __builtin_aarch64_frecpxdf (__a);
20857 /* vrev */
20859 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20860 vrev16_p8 (poly8x8_t a)
20862 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20865 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20866 vrev16_s8 (int8x8_t a)
20868 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20871 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20872 vrev16_u8 (uint8x8_t a)
20874 return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20877 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20878 vrev16q_p8 (poly8x16_t a)
20880 return __builtin_shuffle (a,
20881 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20884 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20885 vrev16q_s8 (int8x16_t a)
20887 return __builtin_shuffle (a,
20888 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20891 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20892 vrev16q_u8 (uint8x16_t a)
20894 return __builtin_shuffle (a,
20895 (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 });
20898 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20899 vrev32_p8 (poly8x8_t a)
20901 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20904 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20905 vrev32_p16 (poly16x4_t a)
20907 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20910 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20911 vrev32_s8 (int8x8_t a)
20913 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20916 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20917 vrev32_s16 (int16x4_t a)
20919 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20922 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
20923 vrev32_u8 (uint8x8_t a)
20925 return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
20928 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
20929 vrev32_u16 (uint16x4_t a)
20931 return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 });
20934 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
20935 vrev32q_p8 (poly8x16_t a)
20937 return __builtin_shuffle (a,
20938 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20941 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
20942 vrev32q_p16 (poly16x8_t a)
20944 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20947 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
20948 vrev32q_s8 (int8x16_t a)
20950 return __builtin_shuffle (a,
20951 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20954 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
20955 vrev32q_s16 (int16x8_t a)
20957 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20960 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
20961 vrev32q_u8 (uint8x16_t a)
20963 return __builtin_shuffle (a,
20964 (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 });
20967 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
20968 vrev32q_u16 (uint16x8_t a)
20970 return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 });
20973 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
20974 vrev64_f32 (float32x2_t a)
20976 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
20979 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
20980 vrev64_p8 (poly8x8_t a)
20982 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20985 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
20986 vrev64_p16 (poly16x4_t a)
20988 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
20991 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
20992 vrev64_s8 (int8x8_t a)
20994 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
20997 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
20998 vrev64_s16 (int16x4_t a)
21000 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21003 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21004 vrev64_s32 (int32x2_t a)
21006 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21009 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21010 vrev64_u8 (uint8x8_t a)
21012 return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 });
21015 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21016 vrev64_u16 (uint16x4_t a)
21018 return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 });
21021 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21022 vrev64_u32 (uint32x2_t a)
21024 return __builtin_shuffle (a, (uint32x2_t) { 1, 0 });
21027 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21028 vrev64q_f32 (float32x4_t a)
21030 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21033 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
21034 vrev64q_p8 (poly8x16_t a)
21036 return __builtin_shuffle (a,
21037 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21040 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
21041 vrev64q_p16 (poly16x8_t a)
21043 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21046 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21047 vrev64q_s8 (int8x16_t a)
21049 return __builtin_shuffle (a,
21050 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21053 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21054 vrev64q_s16 (int16x8_t a)
21056 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21059 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21060 vrev64q_s32 (int32x4_t a)
21062 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21065 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21066 vrev64q_u8 (uint8x16_t a)
21068 return __builtin_shuffle (a,
21069 (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 });
21072 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21073 vrev64q_u16 (uint16x8_t a)
21075 return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 });
21078 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21079 vrev64q_u32 (uint32x4_t a)
21081 return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 });
21084 /* vrnd */
21086 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21087 vrnd_f32 (float32x2_t __a)
21089 return __builtin_aarch64_btruncv2sf (__a);
21092 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21093 vrnd_f64 (float64x1_t __a)
21095 return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0);
21098 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21099 vrndq_f32 (float32x4_t __a)
21101 return __builtin_aarch64_btruncv4sf (__a);
21104 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21105 vrndq_f64 (float64x2_t __a)
21107 return __builtin_aarch64_btruncv2df (__a);
21110 /* vrnda */
21112 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21113 vrnda_f32 (float32x2_t __a)
21115 return __builtin_aarch64_roundv2sf (__a);
21118 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21119 vrnda_f64 (float64x1_t __a)
21121 return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0);
21124 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21125 vrndaq_f32 (float32x4_t __a)
21127 return __builtin_aarch64_roundv4sf (__a);
21130 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21131 vrndaq_f64 (float64x2_t __a)
21133 return __builtin_aarch64_roundv2df (__a);
21136 /* vrndi */
21138 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21139 vrndi_f32 (float32x2_t __a)
21141 return __builtin_aarch64_nearbyintv2sf (__a);
21144 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21145 vrndi_f64 (float64x1_t __a)
21147 return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0);
21150 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21151 vrndiq_f32 (float32x4_t __a)
21153 return __builtin_aarch64_nearbyintv4sf (__a);
21156 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21157 vrndiq_f64 (float64x2_t __a)
21159 return __builtin_aarch64_nearbyintv2df (__a);
21162 /* vrndm */
21164 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21165 vrndm_f32 (float32x2_t __a)
21167 return __builtin_aarch64_floorv2sf (__a);
21170 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21171 vrndm_f64 (float64x1_t __a)
21173 return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0);
21176 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21177 vrndmq_f32 (float32x4_t __a)
21179 return __builtin_aarch64_floorv4sf (__a);
21182 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21183 vrndmq_f64 (float64x2_t __a)
21185 return __builtin_aarch64_floorv2df (__a);
21188 /* vrndn */
21190 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21191 vrndn_f32 (float32x2_t __a)
21193 return __builtin_aarch64_frintnv2sf (__a);
21196 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21197 vrndn_f64 (float64x1_t __a)
21199 return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])};
21202 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21203 vrndnq_f32 (float32x4_t __a)
21205 return __builtin_aarch64_frintnv4sf (__a);
21208 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21209 vrndnq_f64 (float64x2_t __a)
21211 return __builtin_aarch64_frintnv2df (__a);
21214 /* vrndp */
21216 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21217 vrndp_f32 (float32x2_t __a)
21219 return __builtin_aarch64_ceilv2sf (__a);
21222 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21223 vrndp_f64 (float64x1_t __a)
21225 return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0);
21228 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21229 vrndpq_f32 (float32x4_t __a)
21231 return __builtin_aarch64_ceilv4sf (__a);
21234 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21235 vrndpq_f64 (float64x2_t __a)
21237 return __builtin_aarch64_ceilv2df (__a);
21240 /* vrndx */
21242 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
21243 vrndx_f32 (float32x2_t __a)
21245 return __builtin_aarch64_rintv2sf (__a);
21248 __extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
21249 vrndx_f64 (float64x1_t __a)
21251 return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0);
21254 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
21255 vrndxq_f32 (float32x4_t __a)
21257 return __builtin_aarch64_rintv4sf (__a);
21260 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
21261 vrndxq_f64 (float64x2_t __a)
21263 return __builtin_aarch64_rintv2df (__a);
21266 /* vrshl */
21268 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21269 vrshl_s8 (int8x8_t __a, int8x8_t __b)
21271 return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b);
21274 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21275 vrshl_s16 (int16x4_t __a, int16x4_t __b)
21277 return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b);
21280 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21281 vrshl_s32 (int32x2_t __a, int32x2_t __b)
21283 return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b);
21286 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21287 vrshl_s64 (int64x1_t __a, int64x1_t __b)
21289 return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])};
21292 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21293 vrshl_u8 (uint8x8_t __a, int8x8_t __b)
21295 return __builtin_aarch64_urshlv8qi_uus (__a, __b);
21298 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21299 vrshl_u16 (uint16x4_t __a, int16x4_t __b)
21301 return __builtin_aarch64_urshlv4hi_uus (__a, __b);
21304 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21305 vrshl_u32 (uint32x2_t __a, int32x2_t __b)
21307 return __builtin_aarch64_urshlv2si_uus (__a, __b);
21310 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21311 vrshl_u64 (uint64x1_t __a, int64x1_t __b)
21313 return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])};
21316 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21317 vrshlq_s8 (int8x16_t __a, int8x16_t __b)
21319 return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b);
21322 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21323 vrshlq_s16 (int16x8_t __a, int16x8_t __b)
21325 return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b);
21328 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21329 vrshlq_s32 (int32x4_t __a, int32x4_t __b)
21331 return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b);
21334 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21335 vrshlq_s64 (int64x2_t __a, int64x2_t __b)
21337 return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b);
21340 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21341 vrshlq_u8 (uint8x16_t __a, int8x16_t __b)
21343 return __builtin_aarch64_urshlv16qi_uus (__a, __b);
21346 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21347 vrshlq_u16 (uint16x8_t __a, int16x8_t __b)
21349 return __builtin_aarch64_urshlv8hi_uus (__a, __b);
21352 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21353 vrshlq_u32 (uint32x4_t __a, int32x4_t __b)
21355 return __builtin_aarch64_urshlv4si_uus (__a, __b);
21358 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21359 vrshlq_u64 (uint64x2_t __a, int64x2_t __b)
21361 return __builtin_aarch64_urshlv2di_uus (__a, __b);
21364 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21365 vrshld_s64 (int64_t __a, int64_t __b)
21367 return __builtin_aarch64_srshldi (__a, __b);
21370 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21371 vrshld_u64 (uint64_t __a, int64_t __b)
21373 return __builtin_aarch64_urshldi_uus (__a, __b);
21376 /* vrshr */
21378 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21379 vrshr_n_s8 (int8x8_t __a, const int __b)
21381 return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b);
21384 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21385 vrshr_n_s16 (int16x4_t __a, const int __b)
21387 return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b);
21390 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21391 vrshr_n_s32 (int32x2_t __a, const int __b)
21393 return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b);
21396 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21397 vrshr_n_s64 (int64x1_t __a, const int __b)
21399 return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)};
21402 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21403 vrshr_n_u8 (uint8x8_t __a, const int __b)
21405 return __builtin_aarch64_urshr_nv8qi_uus (__a, __b);
21408 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21409 vrshr_n_u16 (uint16x4_t __a, const int __b)
21411 return __builtin_aarch64_urshr_nv4hi_uus (__a, __b);
21414 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21415 vrshr_n_u32 (uint32x2_t __a, const int __b)
21417 return __builtin_aarch64_urshr_nv2si_uus (__a, __b);
21420 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21421 vrshr_n_u64 (uint64x1_t __a, const int __b)
21423 return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)};
21426 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21427 vrshrq_n_s8 (int8x16_t __a, const int __b)
21429 return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b);
21432 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21433 vrshrq_n_s16 (int16x8_t __a, const int __b)
21435 return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b);
21438 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21439 vrshrq_n_s32 (int32x4_t __a, const int __b)
21441 return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b);
21444 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21445 vrshrq_n_s64 (int64x2_t __a, const int __b)
21447 return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b);
21450 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21451 vrshrq_n_u8 (uint8x16_t __a, const int __b)
21453 return __builtin_aarch64_urshr_nv16qi_uus (__a, __b);
21456 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21457 vrshrq_n_u16 (uint16x8_t __a, const int __b)
21459 return __builtin_aarch64_urshr_nv8hi_uus (__a, __b);
21462 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21463 vrshrq_n_u32 (uint32x4_t __a, const int __b)
21465 return __builtin_aarch64_urshr_nv4si_uus (__a, __b);
21468 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21469 vrshrq_n_u64 (uint64x2_t __a, const int __b)
21471 return __builtin_aarch64_urshr_nv2di_uus (__a, __b);
21474 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21475 vrshrd_n_s64 (int64_t __a, const int __b)
21477 return __builtin_aarch64_srshr_ndi (__a, __b);
21480 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21481 vrshrd_n_u64 (uint64_t __a, const int __b)
21483 return __builtin_aarch64_urshr_ndi_uus (__a, __b);
21486 /* vrsra */
21488 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21489 vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
21491 return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c);
21494 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21495 vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
21497 return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c);
21500 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21501 vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
21503 return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c);
21506 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21507 vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
21509 return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)};
21512 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21513 vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
21515 return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c);
21518 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21519 vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
21521 return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c);
21524 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21525 vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
21527 return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c);
21530 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21531 vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
21533 return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)};
21536 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21537 vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
21539 return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c);
21542 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21543 vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
21545 return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c);
21548 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21549 vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
21551 return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c);
21554 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21555 vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
21557 return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c);
21560 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21561 vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
21563 return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c);
21566 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21567 vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
21569 return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c);
21572 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21573 vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
21575 return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c);
21578 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21579 vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
21581 return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c);
21584 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21585 vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
21587 return __builtin_aarch64_srsra_ndi (__a, __b, __c);
21590 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21591 vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
21593 return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c);
21596 #ifdef __ARM_FEATURE_CRYPTO
21598 /* vsha1 */
21600 static __inline uint32x4_t
21601 vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21603 return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk);
21605 static __inline uint32x4_t
21606 vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21608 return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk);
21610 static __inline uint32x4_t
21611 vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
21613 return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk);
21616 static __inline uint32_t
21617 vsha1h_u32 (uint32_t hash_e)
21619 return __builtin_aarch64_crypto_sha1hsi_uu (hash_e);
21622 static __inline uint32x4_t
21623 vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
21625 return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11);
21628 static __inline uint32x4_t
21629 vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
21631 return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15);
21634 static __inline uint32x4_t
21635 vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk)
21637 return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk);
21640 static __inline uint32x4_t
21641 vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk)
21643 return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk);
21646 static __inline uint32x4_t
21647 vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7)
21649 return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7);
21652 static __inline uint32x4_t
21653 vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15)
21655 return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15);
21658 static __inline poly128_t
21659 vmull_p64 (poly64_t a, poly64_t b)
21661 return
21662 __builtin_aarch64_crypto_pmulldi_ppp (a, b);
21665 static __inline poly128_t
21666 vmull_high_p64 (poly64x2_t a, poly64x2_t b)
21668 return __builtin_aarch64_crypto_pmullv2di_ppp (a, b);
21671 #endif
21673 /* vshl */
21675 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21676 vshl_n_s8 (int8x8_t __a, const int __b)
21678 return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b);
21681 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21682 vshl_n_s16 (int16x4_t __a, const int __b)
21684 return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b);
21687 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21688 vshl_n_s32 (int32x2_t __a, const int __b)
21690 return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b);
21693 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21694 vshl_n_s64 (int64x1_t __a, const int __b)
21696 return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)};
21699 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21700 vshl_n_u8 (uint8x8_t __a, const int __b)
21702 return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b);
21705 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21706 vshl_n_u16 (uint16x4_t __a, const int __b)
21708 return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b);
21711 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21712 vshl_n_u32 (uint32x2_t __a, const int __b)
21714 return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b);
21717 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21718 vshl_n_u64 (uint64x1_t __a, const int __b)
21720 return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)};
21723 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21724 vshlq_n_s8 (int8x16_t __a, const int __b)
21726 return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b);
21729 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21730 vshlq_n_s16 (int16x8_t __a, const int __b)
21732 return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b);
21735 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21736 vshlq_n_s32 (int32x4_t __a, const int __b)
21738 return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b);
21741 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21742 vshlq_n_s64 (int64x2_t __a, const int __b)
21744 return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b);
21747 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21748 vshlq_n_u8 (uint8x16_t __a, const int __b)
21750 return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b);
21753 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21754 vshlq_n_u16 (uint16x8_t __a, const int __b)
21756 return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b);
21759 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21760 vshlq_n_u32 (uint32x4_t __a, const int __b)
21762 return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b);
21765 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21766 vshlq_n_u64 (uint64x2_t __a, const int __b)
21768 return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b);
21771 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21772 vshld_n_s64 (int64_t __a, const int __b)
21774 return __builtin_aarch64_ashldi (__a, __b);
21777 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21778 vshld_n_u64 (uint64_t __a, const int __b)
21780 return (uint64_t) __builtin_aarch64_ashldi (__a, __b);
21783 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21784 vshl_s8 (int8x8_t __a, int8x8_t __b)
21786 return __builtin_aarch64_sshlv8qi (__a, __b);
21789 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21790 vshl_s16 (int16x4_t __a, int16x4_t __b)
21792 return __builtin_aarch64_sshlv4hi (__a, __b);
21795 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21796 vshl_s32 (int32x2_t __a, int32x2_t __b)
21798 return __builtin_aarch64_sshlv2si (__a, __b);
21801 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21802 vshl_s64 (int64x1_t __a, int64x1_t __b)
21804 return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])};
21807 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21808 vshl_u8 (uint8x8_t __a, int8x8_t __b)
21810 return __builtin_aarch64_ushlv8qi_uus (__a, __b);
21813 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21814 vshl_u16 (uint16x4_t __a, int16x4_t __b)
21816 return __builtin_aarch64_ushlv4hi_uus (__a, __b);
21819 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
21820 vshl_u32 (uint32x2_t __a, int32x2_t __b)
21822 return __builtin_aarch64_ushlv2si_uus (__a, __b);
21825 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
21826 vshl_u64 (uint64x1_t __a, int64x1_t __b)
21828 return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])};
21831 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
21832 vshlq_s8 (int8x16_t __a, int8x16_t __b)
21834 return __builtin_aarch64_sshlv16qi (__a, __b);
21837 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21838 vshlq_s16 (int16x8_t __a, int16x8_t __b)
21840 return __builtin_aarch64_sshlv8hi (__a, __b);
21843 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21844 vshlq_s32 (int32x4_t __a, int32x4_t __b)
21846 return __builtin_aarch64_sshlv4si (__a, __b);
21849 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21850 vshlq_s64 (int64x2_t __a, int64x2_t __b)
21852 return __builtin_aarch64_sshlv2di (__a, __b);
21855 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
21856 vshlq_u8 (uint8x16_t __a, int8x16_t __b)
21858 return __builtin_aarch64_ushlv16qi_uus (__a, __b);
21861 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21862 vshlq_u16 (uint16x8_t __a, int16x8_t __b)
21864 return __builtin_aarch64_ushlv8hi_uus (__a, __b);
21867 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21868 vshlq_u32 (uint32x4_t __a, int32x4_t __b)
21870 return __builtin_aarch64_ushlv4si_uus (__a, __b);
21873 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21874 vshlq_u64 (uint64x2_t __a, int64x2_t __b)
21876 return __builtin_aarch64_ushlv2di_uus (__a, __b);
21879 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
21880 vshld_s64 (int64_t __a, int64_t __b)
21882 return __builtin_aarch64_sshldi (__a, __b);
21885 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
21886 vshld_u64 (uint64_t __a, uint64_t __b)
21888 return __builtin_aarch64_ushldi_uus (__a, __b);
21891 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21892 vshll_high_n_s8 (int8x16_t __a, const int __b)
21894 return __builtin_aarch64_sshll2_nv16qi (__a, __b);
21897 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21898 vshll_high_n_s16 (int16x8_t __a, const int __b)
21900 return __builtin_aarch64_sshll2_nv8hi (__a, __b);
21903 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21904 vshll_high_n_s32 (int32x4_t __a, const int __b)
21906 return __builtin_aarch64_sshll2_nv4si (__a, __b);
21909 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21910 vshll_high_n_u8 (uint8x16_t __a, const int __b)
21912 return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b);
21915 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21916 vshll_high_n_u16 (uint16x8_t __a, const int __b)
21918 return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b);
21921 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21922 vshll_high_n_u32 (uint32x4_t __a, const int __b)
21924 return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b);
21927 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
21928 vshll_n_s8 (int8x8_t __a, const int __b)
21930 return __builtin_aarch64_sshll_nv8qi (__a, __b);
21933 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
21934 vshll_n_s16 (int16x4_t __a, const int __b)
21936 return __builtin_aarch64_sshll_nv4hi (__a, __b);
21939 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
21940 vshll_n_s32 (int32x2_t __a, const int __b)
21942 return __builtin_aarch64_sshll_nv2si (__a, __b);
21945 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
21946 vshll_n_u8 (uint8x8_t __a, const int __b)
21948 return __builtin_aarch64_ushll_nv8qi_uus (__a, __b);
21951 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
21952 vshll_n_u16 (uint16x4_t __a, const int __b)
21954 return __builtin_aarch64_ushll_nv4hi_uus (__a, __b);
21957 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
21958 vshll_n_u32 (uint32x2_t __a, const int __b)
21960 return __builtin_aarch64_ushll_nv2si_uus (__a, __b);
21963 /* vshr */
21965 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
21966 vshr_n_s8 (int8x8_t __a, const int __b)
21968 return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b);
21971 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
21972 vshr_n_s16 (int16x4_t __a, const int __b)
21974 return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b);
21977 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
21978 vshr_n_s32 (int32x2_t __a, const int __b)
21980 return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b);
21983 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
21984 vshr_n_s64 (int64x1_t __a, const int __b)
21986 return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)};
21989 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
21990 vshr_n_u8 (uint8x8_t __a, const int __b)
21992 return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b);
21995 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
21996 vshr_n_u16 (uint16x4_t __a, const int __b)
21998 return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b);
22001 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22002 vshr_n_u32 (uint32x2_t __a, const int __b)
22004 return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b);
22007 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22008 vshr_n_u64 (uint64x1_t __a, const int __b)
22010 return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)};
22013 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22014 vshrq_n_s8 (int8x16_t __a, const int __b)
22016 return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b);
22019 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22020 vshrq_n_s16 (int16x8_t __a, const int __b)
22022 return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b);
22025 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22026 vshrq_n_s32 (int32x4_t __a, const int __b)
22028 return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b);
22031 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22032 vshrq_n_s64 (int64x2_t __a, const int __b)
22034 return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b);
22037 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22038 vshrq_n_u8 (uint8x16_t __a, const int __b)
22040 return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b);
22043 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22044 vshrq_n_u16 (uint16x8_t __a, const int __b)
22046 return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b);
22049 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22050 vshrq_n_u32 (uint32x4_t __a, const int __b)
22052 return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b);
22055 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22056 vshrq_n_u64 (uint64x2_t __a, const int __b)
22058 return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b);
22061 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22062 vshrd_n_s64 (int64_t __a, const int __b)
22064 return __builtin_aarch64_ashr_simddi (__a, __b);
22067 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22068 vshrd_n_u64 (uint64_t __a, const int __b)
22070 return __builtin_aarch64_lshr_simddi_uus (__a, __b);
22073 /* vsli */
22075 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22076 vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22078 return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c);
22081 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22082 vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22084 return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c);
22087 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22088 vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22090 return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c);
22093 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22094 vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22096 return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)};
22099 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22100 vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22102 return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c);
22105 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22106 vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22108 return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c);
22111 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22112 vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22114 return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c);
22117 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22118 vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22120 return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)};
22123 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22124 vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22126 return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c);
22129 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22130 vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22132 return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c);
22135 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22136 vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22138 return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c);
22141 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22142 vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22144 return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c);
22147 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22148 vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22150 return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c);
22153 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22154 vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22156 return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c);
22159 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22160 vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22162 return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c);
22165 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22166 vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22168 return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c);
22171 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22172 vslid_n_s64 (int64_t __a, int64_t __b, const int __c)
22174 return __builtin_aarch64_ssli_ndi (__a, __b, __c);
22177 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22178 vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22180 return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c);
22183 /* vsqadd */
22185 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22186 vsqadd_u8 (uint8x8_t __a, int8x8_t __b)
22188 return __builtin_aarch64_usqaddv8qi_uus (__a, __b);
22191 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22192 vsqadd_u16 (uint16x4_t __a, int16x4_t __b)
22194 return __builtin_aarch64_usqaddv4hi_uus (__a, __b);
22197 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22198 vsqadd_u32 (uint32x2_t __a, int32x2_t __b)
22200 return __builtin_aarch64_usqaddv2si_uus (__a, __b);
22203 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22204 vsqadd_u64 (uint64x1_t __a, int64x1_t __b)
22206 return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])};
22209 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22210 vsqaddq_u8 (uint8x16_t __a, int8x16_t __b)
22212 return __builtin_aarch64_usqaddv16qi_uus (__a, __b);
22215 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22216 vsqaddq_u16 (uint16x8_t __a, int16x8_t __b)
22218 return __builtin_aarch64_usqaddv8hi_uus (__a, __b);
22221 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22222 vsqaddq_u32 (uint32x4_t __a, int32x4_t __b)
22224 return __builtin_aarch64_usqaddv4si_uus (__a, __b);
22227 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22228 vsqaddq_u64 (uint64x2_t __a, int64x2_t __b)
22230 return __builtin_aarch64_usqaddv2di_uus (__a, __b);
22233 __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
22234 vsqaddb_u8 (uint8x1_t __a, int8x1_t __b)
22236 return __builtin_aarch64_usqaddqi_uus (__a, __b);
22239 __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
22240 vsqaddh_u16 (uint16x1_t __a, int16x1_t __b)
22242 return __builtin_aarch64_usqaddhi_uus (__a, __b);
22245 __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
22246 vsqadds_u32 (uint32x1_t __a, int32x1_t __b)
22248 return __builtin_aarch64_usqaddsi_uus (__a, __b);
22251 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22252 vsqaddd_u64 (uint64_t __a, int64_t __b)
22254 return __builtin_aarch64_usqadddi_uus (__a, __b);
22257 /* vsqrt */
22258 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
22259 vsqrt_f32 (float32x2_t a)
22261 return __builtin_aarch64_sqrtv2sf (a);
22264 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
22265 vsqrtq_f32 (float32x4_t a)
22267 return __builtin_aarch64_sqrtv4sf (a);
22270 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
22271 vsqrtq_f64 (float64x2_t a)
22273 return __builtin_aarch64_sqrtv2df (a);
22276 /* vsra */
22278 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22279 vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22281 return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c);
22284 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22285 vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22287 return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c);
22290 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22291 vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22293 return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c);
22296 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22297 vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22299 return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)};
22302 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22303 vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22305 return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c);
22308 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22309 vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22311 return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c);
22314 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22315 vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22317 return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c);
22320 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22321 vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22323 return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)};
22326 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22327 vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22329 return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c);
22332 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22333 vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22335 return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c);
22338 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22339 vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22341 return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c);
22344 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22345 vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22347 return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c);
22350 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22351 vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22353 return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c);
22356 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22357 vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22359 return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c);
22362 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22363 vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22365 return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c);
22368 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22369 vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22371 return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c);
22374 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22375 vsrad_n_s64 (int64_t __a, int64_t __b, const int __c)
22377 return __builtin_aarch64_ssra_ndi (__a, __b, __c);
22380 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22381 vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22383 return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c);
22386 /* vsri */
22388 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
22389 vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c)
22391 return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c);
22394 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
22395 vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c)
22397 return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c);
22400 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
22401 vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c)
22403 return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c);
22406 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
22407 vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c)
22409 return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)};
22412 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
22413 vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c)
22415 return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c);
22418 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
22419 vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c)
22421 return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c);
22424 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
22425 vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c)
22427 return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c);
22430 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
22431 vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c)
22433 return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)};
22436 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
22437 vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c)
22439 return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c);
22442 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
22443 vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c)
22445 return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c);
22448 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
22449 vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c)
22451 return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c);
22454 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
22455 vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c)
22457 return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c);
22460 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
22461 vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c)
22463 return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c);
22466 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
22467 vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c)
22469 return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c);
22472 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
22473 vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c)
22475 return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c);
22478 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
22479 vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c)
22481 return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c);
22484 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
22485 vsrid_n_s64 (int64_t __a, int64_t __b, const int __c)
22487 return __builtin_aarch64_ssri_ndi (__a, __b, __c);
22490 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
22491 vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c)
22493 return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c);
22496 /* vst1 */
22498 __extension__ static __inline void __attribute__ ((__always_inline__))
22499 vst1_f32 (float32_t *a, float32x2_t b)
22501 __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b);
22504 __extension__ static __inline void __attribute__ ((__always_inline__))
22505 vst1_f64 (float64_t *a, float64x1_t b)
22507 *a = b[0];
22510 __extension__ static __inline void __attribute__ ((__always_inline__))
22511 vst1_p8 (poly8_t *a, poly8x8_t b)
22513 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22514 (int8x8_t) b);
22517 __extension__ static __inline void __attribute__ ((__always_inline__))
22518 vst1_p16 (poly16_t *a, poly16x4_t b)
22520 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22521 (int16x4_t) b);
22524 __extension__ static __inline void __attribute__ ((__always_inline__))
22525 vst1_s8 (int8_t *a, int8x8_t b)
22527 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b);
22530 __extension__ static __inline void __attribute__ ((__always_inline__))
22531 vst1_s16 (int16_t *a, int16x4_t b)
22533 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b);
22536 __extension__ static __inline void __attribute__ ((__always_inline__))
22537 vst1_s32 (int32_t *a, int32x2_t b)
22539 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b);
22542 __extension__ static __inline void __attribute__ ((__always_inline__))
22543 vst1_s64 (int64_t *a, int64x1_t b)
22545 *a = b[0];
22548 __extension__ static __inline void __attribute__ ((__always_inline__))
22549 vst1_u8 (uint8_t *a, uint8x8_t b)
22551 __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a,
22552 (int8x8_t) b);
22555 __extension__ static __inline void __attribute__ ((__always_inline__))
22556 vst1_u16 (uint16_t *a, uint16x4_t b)
22558 __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a,
22559 (int16x4_t) b);
22562 __extension__ static __inline void __attribute__ ((__always_inline__))
22563 vst1_u32 (uint32_t *a, uint32x2_t b)
22565 __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a,
22566 (int32x2_t) b);
22569 __extension__ static __inline void __attribute__ ((__always_inline__))
22570 vst1_u64 (uint64_t *a, uint64x1_t b)
22572 *a = b[0];
22575 __extension__ static __inline void __attribute__ ((__always_inline__))
22576 vst1q_f32 (float32_t *a, float32x4_t b)
22578 __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b);
22581 __extension__ static __inline void __attribute__ ((__always_inline__))
22582 vst1q_f64 (float64_t *a, float64x2_t b)
22584 __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b);
22587 /* vst1q */
22589 __extension__ static __inline void __attribute__ ((__always_inline__))
22590 vst1q_p8 (poly8_t *a, poly8x16_t b)
22592 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22593 (int8x16_t) b);
22596 __extension__ static __inline void __attribute__ ((__always_inline__))
22597 vst1q_p16 (poly16_t *a, poly16x8_t b)
22599 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22600 (int16x8_t) b);
22603 __extension__ static __inline void __attribute__ ((__always_inline__))
22604 vst1q_s8 (int8_t *a, int8x16_t b)
22606 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b);
22609 __extension__ static __inline void __attribute__ ((__always_inline__))
22610 vst1q_s16 (int16_t *a, int16x8_t b)
22612 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b);
22615 __extension__ static __inline void __attribute__ ((__always_inline__))
22616 vst1q_s32 (int32_t *a, int32x4_t b)
22618 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b);
22621 __extension__ static __inline void __attribute__ ((__always_inline__))
22622 vst1q_s64 (int64_t *a, int64x2_t b)
22624 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b);
22627 __extension__ static __inline void __attribute__ ((__always_inline__))
22628 vst1q_u8 (uint8_t *a, uint8x16_t b)
22630 __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a,
22631 (int8x16_t) b);
22634 __extension__ static __inline void __attribute__ ((__always_inline__))
22635 vst1q_u16 (uint16_t *a, uint16x8_t b)
22637 __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a,
22638 (int16x8_t) b);
22641 __extension__ static __inline void __attribute__ ((__always_inline__))
22642 vst1q_u32 (uint32_t *a, uint32x4_t b)
22644 __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a,
22645 (int32x4_t) b);
22648 __extension__ static __inline void __attribute__ ((__always_inline__))
22649 vst1q_u64 (uint64_t *a, uint64x2_t b)
22651 __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a,
22652 (int64x2_t) b);
22655 /* vstn */
22657 __extension__ static __inline void
22658 vst2_s64 (int64_t * __a, int64x1x2_t val)
22660 __builtin_aarch64_simd_oi __o;
22661 int64x2x2_t temp;
22662 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22663 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22664 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22665 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22666 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22669 __extension__ static __inline void
22670 vst2_u64 (uint64_t * __a, uint64x1x2_t val)
22672 __builtin_aarch64_simd_oi __o;
22673 uint64x2x2_t temp;
22674 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22675 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22676 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0);
22677 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1);
22678 __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o);
22681 __extension__ static __inline void
22682 vst2_f64 (float64_t * __a, float64x1x2_t val)
22684 __builtin_aarch64_simd_oi __o;
22685 float64x2x2_t temp;
22686 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22687 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22688 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0);
22689 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1);
22690 __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o);
22693 __extension__ static __inline void
22694 vst2_s8 (int8_t * __a, int8x8x2_t val)
22696 __builtin_aarch64_simd_oi __o;
22697 int8x16x2_t temp;
22698 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22699 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22700 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22701 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22702 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22705 __extension__ static __inline void __attribute__ ((__always_inline__))
22706 vst2_p8 (poly8_t * __a, poly8x8x2_t val)
22708 __builtin_aarch64_simd_oi __o;
22709 poly8x16x2_t temp;
22710 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22711 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22712 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22713 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22714 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22717 __extension__ static __inline void __attribute__ ((__always_inline__))
22718 vst2_s16 (int16_t * __a, int16x4x2_t val)
22720 __builtin_aarch64_simd_oi __o;
22721 int16x8x2_t temp;
22722 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22723 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22724 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22725 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22726 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22729 __extension__ static __inline void __attribute__ ((__always_inline__))
22730 vst2_p16 (poly16_t * __a, poly16x4x2_t val)
22732 __builtin_aarch64_simd_oi __o;
22733 poly16x8x2_t temp;
22734 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
22735 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
22736 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22737 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22738 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22741 __extension__ static __inline void __attribute__ ((__always_inline__))
22742 vst2_s32 (int32_t * __a, int32x2x2_t val)
22744 __builtin_aarch64_simd_oi __o;
22745 int32x4x2_t temp;
22746 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
22747 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
22748 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22749 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22750 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22753 __extension__ static __inline void __attribute__ ((__always_inline__))
22754 vst2_u8 (uint8_t * __a, uint8x8x2_t val)
22756 __builtin_aarch64_simd_oi __o;
22757 uint8x16x2_t temp;
22758 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
22759 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
22760 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0);
22761 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1);
22762 __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22765 __extension__ static __inline void __attribute__ ((__always_inline__))
22766 vst2_u16 (uint16_t * __a, uint16x4x2_t val)
22768 __builtin_aarch64_simd_oi __o;
22769 uint16x8x2_t temp;
22770 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
22771 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
22772 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0);
22773 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1);
22774 __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22777 __extension__ static __inline void __attribute__ ((__always_inline__))
22778 vst2_u32 (uint32_t * __a, uint32x2x2_t val)
22780 __builtin_aarch64_simd_oi __o;
22781 uint32x4x2_t temp;
22782 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
22783 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
22784 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0);
22785 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1);
22786 __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o);
22789 __extension__ static __inline void __attribute__ ((__always_inline__))
22790 vst2_f32 (float32_t * __a, float32x2x2_t val)
22792 __builtin_aarch64_simd_oi __o;
22793 float32x4x2_t temp;
22794 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
22795 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
22796 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0);
22797 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1);
22798 __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
22801 __extension__ static __inline void __attribute__ ((__always_inline__))
22802 vst2q_s8 (int8_t * __a, int8x16x2_t val)
22804 __builtin_aarch64_simd_oi __o;
22805 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22806 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22807 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22810 __extension__ static __inline void __attribute__ ((__always_inline__))
22811 vst2q_p8 (poly8_t * __a, poly8x16x2_t val)
22813 __builtin_aarch64_simd_oi __o;
22814 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22815 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22816 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22819 __extension__ static __inline void __attribute__ ((__always_inline__))
22820 vst2q_s16 (int16_t * __a, int16x8x2_t val)
22822 __builtin_aarch64_simd_oi __o;
22823 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22824 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22825 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22828 __extension__ static __inline void __attribute__ ((__always_inline__))
22829 vst2q_p16 (poly16_t * __a, poly16x8x2_t val)
22831 __builtin_aarch64_simd_oi __o;
22832 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22833 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22834 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22837 __extension__ static __inline void __attribute__ ((__always_inline__))
22838 vst2q_s32 (int32_t * __a, int32x4x2_t val)
22840 __builtin_aarch64_simd_oi __o;
22841 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22842 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22843 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22846 __extension__ static __inline void __attribute__ ((__always_inline__))
22847 vst2q_s64 (int64_t * __a, int64x2x2_t val)
22849 __builtin_aarch64_simd_oi __o;
22850 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22851 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22852 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22855 __extension__ static __inline void __attribute__ ((__always_inline__))
22856 vst2q_u8 (uint8_t * __a, uint8x16x2_t val)
22858 __builtin_aarch64_simd_oi __o;
22859 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0);
22860 __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1);
22861 __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
22864 __extension__ static __inline void __attribute__ ((__always_inline__))
22865 vst2q_u16 (uint16_t * __a, uint16x8x2_t val)
22867 __builtin_aarch64_simd_oi __o;
22868 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0);
22869 __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1);
22870 __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
22873 __extension__ static __inline void __attribute__ ((__always_inline__))
22874 vst2q_u32 (uint32_t * __a, uint32x4x2_t val)
22876 __builtin_aarch64_simd_oi __o;
22877 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0);
22878 __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1);
22879 __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o);
22882 __extension__ static __inline void __attribute__ ((__always_inline__))
22883 vst2q_u64 (uint64_t * __a, uint64x2x2_t val)
22885 __builtin_aarch64_simd_oi __o;
22886 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0);
22887 __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1);
22888 __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o);
22891 __extension__ static __inline void __attribute__ ((__always_inline__))
22892 vst2q_f32 (float32_t * __a, float32x4x2_t val)
22894 __builtin_aarch64_simd_oi __o;
22895 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0);
22896 __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1);
22897 __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
22900 __extension__ static __inline void __attribute__ ((__always_inline__))
22901 vst2q_f64 (float64_t * __a, float64x2x2_t val)
22903 __builtin_aarch64_simd_oi __o;
22904 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0);
22905 __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1);
22906 __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o);
22909 __extension__ static __inline void
22910 vst3_s64 (int64_t * __a, int64x1x3_t val)
22912 __builtin_aarch64_simd_ci __o;
22913 int64x2x3_t temp;
22914 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
22915 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
22916 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
22917 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22918 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22919 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22920 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22923 __extension__ static __inline void
22924 vst3_u64 (uint64_t * __a, uint64x1x3_t val)
22926 __builtin_aarch64_simd_ci __o;
22927 uint64x2x3_t temp;
22928 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
22929 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
22930 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
22931 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0);
22932 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1);
22933 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2);
22934 __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o);
22937 __extension__ static __inline void
22938 vst3_f64 (float64_t * __a, float64x1x3_t val)
22940 __builtin_aarch64_simd_ci __o;
22941 float64x2x3_t temp;
22942 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
22943 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
22944 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
22945 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0);
22946 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1);
22947 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2);
22948 __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o);
22951 __extension__ static __inline void
22952 vst3_s8 (int8_t * __a, int8x8x3_t val)
22954 __builtin_aarch64_simd_ci __o;
22955 int8x16x3_t temp;
22956 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
22957 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
22958 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
22959 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22960 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22961 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22962 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22965 __extension__ static __inline void __attribute__ ((__always_inline__))
22966 vst3_p8 (poly8_t * __a, poly8x8x3_t val)
22968 __builtin_aarch64_simd_ci __o;
22969 poly8x16x3_t temp;
22970 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
22971 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
22972 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
22973 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
22974 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
22975 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
22976 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
22979 __extension__ static __inline void __attribute__ ((__always_inline__))
22980 vst3_s16 (int16_t * __a, int16x4x3_t val)
22982 __builtin_aarch64_simd_ci __o;
22983 int16x8x3_t temp;
22984 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
22985 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
22986 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
22987 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
22988 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
22989 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
22990 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
22993 __extension__ static __inline void __attribute__ ((__always_inline__))
22994 vst3_p16 (poly16_t * __a, poly16x4x3_t val)
22996 __builtin_aarch64_simd_ci __o;
22997 poly16x8x3_t temp;
22998 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
22999 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23000 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23001 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23002 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23003 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23004 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23007 __extension__ static __inline void __attribute__ ((__always_inline__))
23008 vst3_s32 (int32_t * __a, int32x2x3_t val)
23010 __builtin_aarch64_simd_ci __o;
23011 int32x4x3_t temp;
23012 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23013 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23014 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23015 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23016 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23017 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23018 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23021 __extension__ static __inline void __attribute__ ((__always_inline__))
23022 vst3_u8 (uint8_t * __a, uint8x8x3_t val)
23024 __builtin_aarch64_simd_ci __o;
23025 uint8x16x3_t temp;
23026 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23027 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23028 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23029 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0);
23030 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1);
23031 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2);
23032 __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23035 __extension__ static __inline void __attribute__ ((__always_inline__))
23036 vst3_u16 (uint16_t * __a, uint16x4x3_t val)
23038 __builtin_aarch64_simd_ci __o;
23039 uint16x8x3_t temp;
23040 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23041 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23042 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23043 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0);
23044 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1);
23045 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2);
23046 __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23049 __extension__ static __inline void __attribute__ ((__always_inline__))
23050 vst3_u32 (uint32_t * __a, uint32x2x3_t val)
23052 __builtin_aarch64_simd_ci __o;
23053 uint32x4x3_t temp;
23054 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23055 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23056 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23057 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0);
23058 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1);
23059 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2);
23060 __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o);
23063 __extension__ static __inline void __attribute__ ((__always_inline__))
23064 vst3_f32 (float32_t * __a, float32x2x3_t val)
23066 __builtin_aarch64_simd_ci __o;
23067 float32x4x3_t temp;
23068 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23069 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23070 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23071 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0);
23072 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1);
23073 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2);
23074 __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23077 __extension__ static __inline void __attribute__ ((__always_inline__))
23078 vst3q_s8 (int8_t * __a, int8x16x3_t val)
23080 __builtin_aarch64_simd_ci __o;
23081 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23082 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23083 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23084 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23087 __extension__ static __inline void __attribute__ ((__always_inline__))
23088 vst3q_p8 (poly8_t * __a, poly8x16x3_t val)
23090 __builtin_aarch64_simd_ci __o;
23091 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23092 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23093 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23094 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23097 __extension__ static __inline void __attribute__ ((__always_inline__))
23098 vst3q_s16 (int16_t * __a, int16x8x3_t val)
23100 __builtin_aarch64_simd_ci __o;
23101 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23102 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23103 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23104 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23107 __extension__ static __inline void __attribute__ ((__always_inline__))
23108 vst3q_p16 (poly16_t * __a, poly16x8x3_t val)
23110 __builtin_aarch64_simd_ci __o;
23111 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23112 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23113 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23114 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23117 __extension__ static __inline void __attribute__ ((__always_inline__))
23118 vst3q_s32 (int32_t * __a, int32x4x3_t val)
23120 __builtin_aarch64_simd_ci __o;
23121 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23122 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23123 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23124 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23127 __extension__ static __inline void __attribute__ ((__always_inline__))
23128 vst3q_s64 (int64_t * __a, int64x2x3_t val)
23130 __builtin_aarch64_simd_ci __o;
23131 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23132 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23133 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23134 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23137 __extension__ static __inline void __attribute__ ((__always_inline__))
23138 vst3q_u8 (uint8_t * __a, uint8x16x3_t val)
23140 __builtin_aarch64_simd_ci __o;
23141 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0);
23142 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1);
23143 __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2);
23144 __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23147 __extension__ static __inline void __attribute__ ((__always_inline__))
23148 vst3q_u16 (uint16_t * __a, uint16x8x3_t val)
23150 __builtin_aarch64_simd_ci __o;
23151 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0);
23152 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1);
23153 __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2);
23154 __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23157 __extension__ static __inline void __attribute__ ((__always_inline__))
23158 vst3q_u32 (uint32_t * __a, uint32x4x3_t val)
23160 __builtin_aarch64_simd_ci __o;
23161 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0);
23162 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1);
23163 __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2);
23164 __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o);
23167 __extension__ static __inline void __attribute__ ((__always_inline__))
23168 vst3q_u64 (uint64_t * __a, uint64x2x3_t val)
23170 __builtin_aarch64_simd_ci __o;
23171 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0);
23172 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1);
23173 __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2);
23174 __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o);
23177 __extension__ static __inline void __attribute__ ((__always_inline__))
23178 vst3q_f32 (float32_t * __a, float32x4x3_t val)
23180 __builtin_aarch64_simd_ci __o;
23181 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0);
23182 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1);
23183 __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2);
23184 __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23187 __extension__ static __inline void __attribute__ ((__always_inline__))
23188 vst3q_f64 (float64_t * __a, float64x2x3_t val)
23190 __builtin_aarch64_simd_ci __o;
23191 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0);
23192 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1);
23193 __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2);
23194 __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o);
23197 __extension__ static __inline void
23198 vst4_s64 (int64_t * __a, int64x1x4_t val)
23200 __builtin_aarch64_simd_xi __o;
23201 int64x2x4_t temp;
23202 temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0)));
23203 temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0)));
23204 temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0)));
23205 temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0)));
23206 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23207 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23208 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23209 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23210 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23213 __extension__ static __inline void
23214 vst4_u64 (uint64_t * __a, uint64x1x4_t val)
23216 __builtin_aarch64_simd_xi __o;
23217 uint64x2x4_t temp;
23218 temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0)));
23219 temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0)));
23220 temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0)));
23221 temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0)));
23222 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0);
23223 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1);
23224 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2);
23225 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3);
23226 __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o);
23229 __extension__ static __inline void
23230 vst4_f64 (float64_t * __a, float64x1x4_t val)
23232 __builtin_aarch64_simd_xi __o;
23233 float64x2x4_t temp;
23234 temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0)));
23235 temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0)));
23236 temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0)));
23237 temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0)));
23238 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0);
23239 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1);
23240 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2);
23241 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3);
23242 __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o);
23245 __extension__ static __inline void
23246 vst4_s8 (int8_t * __a, int8x8x4_t val)
23248 __builtin_aarch64_simd_xi __o;
23249 int8x16x4_t temp;
23250 temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0)));
23251 temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0)));
23252 temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0)));
23253 temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0)));
23254 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23255 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23256 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23257 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23258 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23261 __extension__ static __inline void __attribute__ ((__always_inline__))
23262 vst4_p8 (poly8_t * __a, poly8x8x4_t val)
23264 __builtin_aarch64_simd_xi __o;
23265 poly8x16x4_t temp;
23266 temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0)));
23267 temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0)));
23268 temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0)));
23269 temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0)));
23270 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23271 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23272 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23273 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23274 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23277 __extension__ static __inline void __attribute__ ((__always_inline__))
23278 vst4_s16 (int16_t * __a, int16x4x4_t val)
23280 __builtin_aarch64_simd_xi __o;
23281 int16x8x4_t temp;
23282 temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0)));
23283 temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0)));
23284 temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0)));
23285 temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0)));
23286 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23287 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23288 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23289 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23290 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23293 __extension__ static __inline void __attribute__ ((__always_inline__))
23294 vst4_p16 (poly16_t * __a, poly16x4x4_t val)
23296 __builtin_aarch64_simd_xi __o;
23297 poly16x8x4_t temp;
23298 temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0)));
23299 temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0)));
23300 temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0)));
23301 temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0)));
23302 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23303 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23304 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23305 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23306 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23309 __extension__ static __inline void __attribute__ ((__always_inline__))
23310 vst4_s32 (int32_t * __a, int32x2x4_t val)
23312 __builtin_aarch64_simd_xi __o;
23313 int32x4x4_t temp;
23314 temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0)));
23315 temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0)));
23316 temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0)));
23317 temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0)));
23318 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23319 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23320 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23321 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23322 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23325 __extension__ static __inline void __attribute__ ((__always_inline__))
23326 vst4_u8 (uint8_t * __a, uint8x8x4_t val)
23328 __builtin_aarch64_simd_xi __o;
23329 uint8x16x4_t temp;
23330 temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0)));
23331 temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0)));
23332 temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0)));
23333 temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0)));
23334 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0);
23335 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1);
23336 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2);
23337 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3);
23338 __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o);
23341 __extension__ static __inline void __attribute__ ((__always_inline__))
23342 vst4_u16 (uint16_t * __a, uint16x4x4_t val)
23344 __builtin_aarch64_simd_xi __o;
23345 uint16x8x4_t temp;
23346 temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0)));
23347 temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0)));
23348 temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0)));
23349 temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0)));
23350 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0);
23351 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1);
23352 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2);
23353 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3);
23354 __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o);
23357 __extension__ static __inline void __attribute__ ((__always_inline__))
23358 vst4_u32 (uint32_t * __a, uint32x2x4_t val)
23360 __builtin_aarch64_simd_xi __o;
23361 uint32x4x4_t temp;
23362 temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0)));
23363 temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0)));
23364 temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0)));
23365 temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0)));
23366 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0);
23367 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1);
23368 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2);
23369 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3);
23370 __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o);
23373 __extension__ static __inline void __attribute__ ((__always_inline__))
23374 vst4_f32 (float32_t * __a, float32x2x4_t val)
23376 __builtin_aarch64_simd_xi __o;
23377 float32x4x4_t temp;
23378 temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0)));
23379 temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0)));
23380 temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0)));
23381 temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0)));
23382 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0);
23383 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1);
23384 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2);
23385 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3);
23386 __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o);
23389 __extension__ static __inline void __attribute__ ((__always_inline__))
23390 vst4q_s8 (int8_t * __a, int8x16x4_t val)
23392 __builtin_aarch64_simd_xi __o;
23393 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23394 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23395 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23396 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23397 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23400 __extension__ static __inline void __attribute__ ((__always_inline__))
23401 vst4q_p8 (poly8_t * __a, poly8x16x4_t val)
23403 __builtin_aarch64_simd_xi __o;
23404 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23405 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23406 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23407 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23408 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23411 __extension__ static __inline void __attribute__ ((__always_inline__))
23412 vst4q_s16 (int16_t * __a, int16x8x4_t val)
23414 __builtin_aarch64_simd_xi __o;
23415 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23416 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23417 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23418 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23419 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23422 __extension__ static __inline void __attribute__ ((__always_inline__))
23423 vst4q_p16 (poly16_t * __a, poly16x8x4_t val)
23425 __builtin_aarch64_simd_xi __o;
23426 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23427 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23428 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23429 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23430 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23433 __extension__ static __inline void __attribute__ ((__always_inline__))
23434 vst4q_s32 (int32_t * __a, int32x4x4_t val)
23436 __builtin_aarch64_simd_xi __o;
23437 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23438 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23439 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23440 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23441 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23444 __extension__ static __inline void __attribute__ ((__always_inline__))
23445 vst4q_s64 (int64_t * __a, int64x2x4_t val)
23447 __builtin_aarch64_simd_xi __o;
23448 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23449 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23450 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23451 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23452 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23455 __extension__ static __inline void __attribute__ ((__always_inline__))
23456 vst4q_u8 (uint8_t * __a, uint8x16x4_t val)
23458 __builtin_aarch64_simd_xi __o;
23459 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0);
23460 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1);
23461 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2);
23462 __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3);
23463 __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o);
23466 __extension__ static __inline void __attribute__ ((__always_inline__))
23467 vst4q_u16 (uint16_t * __a, uint16x8x4_t val)
23469 __builtin_aarch64_simd_xi __o;
23470 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0);
23471 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1);
23472 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2);
23473 __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3);
23474 __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o);
23477 __extension__ static __inline void __attribute__ ((__always_inline__))
23478 vst4q_u32 (uint32_t * __a, uint32x4x4_t val)
23480 __builtin_aarch64_simd_xi __o;
23481 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0);
23482 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1);
23483 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2);
23484 __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3);
23485 __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o);
23488 __extension__ static __inline void __attribute__ ((__always_inline__))
23489 vst4q_u64 (uint64_t * __a, uint64x2x4_t val)
23491 __builtin_aarch64_simd_xi __o;
23492 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0);
23493 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1);
23494 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2);
23495 __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3);
23496 __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
23499 __extension__ static __inline void __attribute__ ((__always_inline__))
23500 vst4q_f32 (float32_t * __a, float32x4x4_t val)
23502 __builtin_aarch64_simd_xi __o;
23503 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0);
23504 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1);
23505 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2);
23506 __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3);
23507 __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o);
23510 __extension__ static __inline void __attribute__ ((__always_inline__))
23511 vst4q_f64 (float64_t * __a, float64x2x4_t val)
23513 __builtin_aarch64_simd_xi __o;
23514 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0);
23515 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1);
23516 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2);
23517 __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3);
23518 __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o);
23521 /* vsub */
23523 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
23524 vsubd_s64 (int64_t __a, int64_t __b)
23526 return __a - __b;
23529 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
23530 vsubd_u64 (uint64_t __a, uint64_t __b)
23532 return __a - __b;
23535 /* vtbx1 */
23537 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23538 vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx)
23540 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23541 vmov_n_u8 (8));
23542 int8x8_t __tbl = vtbl1_s8 (__tab, __idx);
23544 return vbsl_s8 (__mask, __tbl, __r);
23547 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23548 vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx)
23550 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23551 uint8x8_t __tbl = vtbl1_u8 (__tab, __idx);
23553 return vbsl_u8 (__mask, __tbl, __r);
23556 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23557 vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx)
23559 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8));
23560 poly8x8_t __tbl = vtbl1_p8 (__tab, __idx);
23562 return vbsl_p8 (__mask, __tbl, __r);
23565 /* vtbx3 */
23567 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23568 vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx)
23570 uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx),
23571 vmov_n_u8 (24));
23572 int8x8_t __tbl = vtbl3_s8 (__tab, __idx);
23574 return vbsl_s8 (__mask, __tbl, __r);
23577 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23578 vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx)
23580 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23581 uint8x8_t __tbl = vtbl3_u8 (__tab, __idx);
23583 return vbsl_u8 (__mask, __tbl, __r);
23586 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23587 vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx)
23589 uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24));
23590 poly8x8_t __tbl = vtbl3_p8 (__tab, __idx);
23592 return vbsl_p8 (__mask, __tbl, __r);
23595 /* vtrn */
23597 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23598 vtrn1_f32 (float32x2_t __a, float32x2_t __b)
23600 #ifdef __AARCH64EB__
23601 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23602 #else
23603 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23604 #endif
23607 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23608 vtrn1_p8 (poly8x8_t __a, poly8x8_t __b)
23610 #ifdef __AARCH64EB__
23611 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23612 #else
23613 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23614 #endif
23617 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23618 vtrn1_p16 (poly16x4_t __a, poly16x4_t __b)
23620 #ifdef __AARCH64EB__
23621 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23622 #else
23623 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23624 #endif
23627 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23628 vtrn1_s8 (int8x8_t __a, int8x8_t __b)
23630 #ifdef __AARCH64EB__
23631 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23632 #else
23633 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23634 #endif
23637 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23638 vtrn1_s16 (int16x4_t __a, int16x4_t __b)
23640 #ifdef __AARCH64EB__
23641 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23642 #else
23643 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23644 #endif
23647 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23648 vtrn1_s32 (int32x2_t __a, int32x2_t __b)
23650 #ifdef __AARCH64EB__
23651 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23652 #else
23653 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23654 #endif
23657 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23658 vtrn1_u8 (uint8x8_t __a, uint8x8_t __b)
23660 #ifdef __AARCH64EB__
23661 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23662 #else
23663 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23664 #endif
23667 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23668 vtrn1_u16 (uint16x4_t __a, uint16x4_t __b)
23670 #ifdef __AARCH64EB__
23671 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3});
23672 #else
23673 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6});
23674 #endif
23677 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23678 vtrn1_u32 (uint32x2_t __a, uint32x2_t __b)
23680 #ifdef __AARCH64EB__
23681 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
23682 #else
23683 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
23684 #endif
23687 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23688 vtrn1q_f32 (float32x4_t __a, float32x4_t __b)
23690 #ifdef __AARCH64EB__
23691 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23692 #else
23693 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23694 #endif
23697 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23698 vtrn1q_f64 (float64x2_t __a, float64x2_t __b)
23700 #ifdef __AARCH64EB__
23701 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23702 #else
23703 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23704 #endif
23707 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23708 vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b)
23710 #ifdef __AARCH64EB__
23711 return __builtin_shuffle (__a, __b,
23712 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23713 #else
23714 return __builtin_shuffle (__a, __b,
23715 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23716 #endif
23719 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23720 vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b)
23722 #ifdef __AARCH64EB__
23723 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23724 #else
23725 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23726 #endif
23729 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23730 vtrn1q_s8 (int8x16_t __a, int8x16_t __b)
23732 #ifdef __AARCH64EB__
23733 return __builtin_shuffle (__a, __b,
23734 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23735 #else
23736 return __builtin_shuffle (__a, __b,
23737 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23738 #endif
23741 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23742 vtrn1q_s16 (int16x8_t __a, int16x8_t __b)
23744 #ifdef __AARCH64EB__
23745 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23746 #else
23747 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23748 #endif
23751 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23752 vtrn1q_s32 (int32x4_t __a, int32x4_t __b)
23754 #ifdef __AARCH64EB__
23755 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23756 #else
23757 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23758 #endif
23761 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23762 vtrn1q_s64 (int64x2_t __a, int64x2_t __b)
23764 #ifdef __AARCH64EB__
23765 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23766 #else
23767 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23768 #endif
23771 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23772 vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b)
23774 #ifdef __AARCH64EB__
23775 return __builtin_shuffle (__a, __b,
23776 (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15});
23777 #else
23778 return __builtin_shuffle (__a, __b,
23779 (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30});
23780 #endif
23783 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
23784 vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b)
23786 #ifdef __AARCH64EB__
23787 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7});
23788 #else
23789 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14});
23790 #endif
23793 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
23794 vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
23796 #ifdef __AARCH64EB__
23797 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3});
23798 #else
23799 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6});
23800 #endif
23803 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
23804 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
23806 #ifdef __AARCH64EB__
23807 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
23808 #else
23809 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
23810 #endif
23813 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
23814 vtrn2_f32 (float32x2_t __a, float32x2_t __b)
23816 #ifdef __AARCH64EB__
23817 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23818 #else
23819 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23820 #endif
23823 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
23824 vtrn2_p8 (poly8x8_t __a, poly8x8_t __b)
23826 #ifdef __AARCH64EB__
23827 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23828 #else
23829 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23830 #endif
23833 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
23834 vtrn2_p16 (poly16x4_t __a, poly16x4_t __b)
23836 #ifdef __AARCH64EB__
23837 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23838 #else
23839 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23840 #endif
23843 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
23844 vtrn2_s8 (int8x8_t __a, int8x8_t __b)
23846 #ifdef __AARCH64EB__
23847 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23848 #else
23849 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23850 #endif
23853 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
23854 vtrn2_s16 (int16x4_t __a, int16x4_t __b)
23856 #ifdef __AARCH64EB__
23857 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23858 #else
23859 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23860 #endif
23863 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
23864 vtrn2_s32 (int32x2_t __a, int32x2_t __b)
23866 #ifdef __AARCH64EB__
23867 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23868 #else
23869 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23870 #endif
23873 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
23874 vtrn2_u8 (uint8x8_t __a, uint8x8_t __b)
23876 #ifdef __AARCH64EB__
23877 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23878 #else
23879 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23880 #endif
23883 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
23884 vtrn2_u16 (uint16x4_t __a, uint16x4_t __b)
23886 #ifdef __AARCH64EB__
23887 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2});
23888 #else
23889 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7});
23890 #endif
23893 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
23894 vtrn2_u32 (uint32x2_t __a, uint32x2_t __b)
23896 #ifdef __AARCH64EB__
23897 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
23898 #else
23899 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
23900 #endif
23903 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
23904 vtrn2q_f32 (float32x4_t __a, float32x4_t __b)
23906 #ifdef __AARCH64EB__
23907 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23908 #else
23909 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23910 #endif
23913 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
23914 vtrn2q_f64 (float64x2_t __a, float64x2_t __b)
23916 #ifdef __AARCH64EB__
23917 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23918 #else
23919 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23920 #endif
23923 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
23924 vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b)
23926 #ifdef __AARCH64EB__
23927 return __builtin_shuffle (__a, __b,
23928 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23929 #else
23930 return __builtin_shuffle (__a, __b,
23931 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23932 #endif
23935 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
23936 vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b)
23938 #ifdef __AARCH64EB__
23939 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23940 #else
23941 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23942 #endif
23945 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
23946 vtrn2q_s8 (int8x16_t __a, int8x16_t __b)
23948 #ifdef __AARCH64EB__
23949 return __builtin_shuffle (__a, __b,
23950 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23951 #else
23952 return __builtin_shuffle (__a, __b,
23953 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23954 #endif
23957 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
23958 vtrn2q_s16 (int16x8_t __a, int16x8_t __b)
23960 #ifdef __AARCH64EB__
23961 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
23962 #else
23963 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
23964 #endif
23967 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
23968 vtrn2q_s32 (int32x4_t __a, int32x4_t __b)
23970 #ifdef __AARCH64EB__
23971 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
23972 #else
23973 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
23974 #endif
23977 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
23978 vtrn2q_s64 (int64x2_t __a, int64x2_t __b)
23980 #ifdef __AARCH64EB__
23981 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
23982 #else
23983 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
23984 #endif
23987 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
23988 vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b)
23990 #ifdef __AARCH64EB__
23991 return __builtin_shuffle (__a, __b,
23992 (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14});
23993 #else
23994 return __builtin_shuffle (__a, __b,
23995 (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31});
23996 #endif
23999 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24000 vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b)
24002 #ifdef __AARCH64EB__
24003 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6});
24004 #else
24005 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15});
24006 #endif
24009 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24010 vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b)
24012 #ifdef __AARCH64EB__
24013 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2});
24014 #else
24015 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7});
24016 #endif
24019 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24020 vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
24022 #ifdef __AARCH64EB__
24023 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24024 #else
24025 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24026 #endif
24029 __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__))
24030 vtrn_f32 (float32x2_t a, float32x2_t b)
24032 return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)};
24035 __extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__))
24036 vtrn_p8 (poly8x8_t a, poly8x8_t b)
24038 return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)};
24041 __extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__))
24042 vtrn_p16 (poly16x4_t a, poly16x4_t b)
24044 return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)};
24047 __extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__))
24048 vtrn_s8 (int8x8_t a, int8x8_t b)
24050 return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)};
24053 __extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__))
24054 vtrn_s16 (int16x4_t a, int16x4_t b)
24056 return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)};
24059 __extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__))
24060 vtrn_s32 (int32x2_t a, int32x2_t b)
24062 return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)};
24065 __extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__))
24066 vtrn_u8 (uint8x8_t a, uint8x8_t b)
24068 return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)};
24071 __extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__))
24072 vtrn_u16 (uint16x4_t a, uint16x4_t b)
24074 return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)};
24077 __extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__))
24078 vtrn_u32 (uint32x2_t a, uint32x2_t b)
24080 return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)};
24083 __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__))
24084 vtrnq_f32 (float32x4_t a, float32x4_t b)
24086 return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)};
24089 __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__))
24090 vtrnq_p8 (poly8x16_t a, poly8x16_t b)
24092 return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)};
24095 __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__))
24096 vtrnq_p16 (poly16x8_t a, poly16x8_t b)
24098 return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)};
24101 __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__))
24102 vtrnq_s8 (int8x16_t a, int8x16_t b)
24104 return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)};
24107 __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__))
24108 vtrnq_s16 (int16x8_t a, int16x8_t b)
24110 return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)};
24113 __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__))
24114 vtrnq_s32 (int32x4_t a, int32x4_t b)
24116 return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)};
24119 __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__))
24120 vtrnq_u8 (uint8x16_t a, uint8x16_t b)
24122 return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)};
24125 __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__))
24126 vtrnq_u16 (uint16x8_t a, uint16x8_t b)
24128 return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)};
24131 __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__))
24132 vtrnq_u32 (uint32x4_t a, uint32x4_t b)
24134 return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)};
24137 /* vtst */
24139 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24140 vtst_s8 (int8x8_t __a, int8x8_t __b)
24142 return (uint8x8_t) __builtin_aarch64_cmtstv8qi (__a, __b);
24145 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24146 vtst_s16 (int16x4_t __a, int16x4_t __b)
24148 return (uint16x4_t) __builtin_aarch64_cmtstv4hi (__a, __b);
24151 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24152 vtst_s32 (int32x2_t __a, int32x2_t __b)
24154 return (uint32x2_t) __builtin_aarch64_cmtstv2si (__a, __b);
24157 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24158 vtst_s64 (int64x1_t __a, int64x1_t __b)
24160 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24163 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24164 vtst_u8 (uint8x8_t __a, uint8x8_t __b)
24166 return (uint8x8_t) __builtin_aarch64_cmtstv8qi ((int8x8_t) __a,
24167 (int8x8_t) __b);
24170 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24171 vtst_u16 (uint16x4_t __a, uint16x4_t __b)
24173 return (uint16x4_t) __builtin_aarch64_cmtstv4hi ((int16x4_t) __a,
24174 (int16x4_t) __b);
24177 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24178 vtst_u32 (uint32x2_t __a, uint32x2_t __b)
24180 return (uint32x2_t) __builtin_aarch64_cmtstv2si ((int32x2_t) __a,
24181 (int32x2_t) __b);
24184 __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
24185 vtst_u64 (uint64x1_t __a, uint64x1_t __b)
24187 return (uint64x1_t) {(__a[0] & __b[0]) ? -1ll : 0ll};
24190 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24191 vtstq_s8 (int8x16_t __a, int8x16_t __b)
24193 return (uint8x16_t) __builtin_aarch64_cmtstv16qi (__a, __b);
24196 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24197 vtstq_s16 (int16x8_t __a, int16x8_t __b)
24199 return (uint16x8_t) __builtin_aarch64_cmtstv8hi (__a, __b);
24202 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24203 vtstq_s32 (int32x4_t __a, int32x4_t __b)
24205 return (uint32x4_t) __builtin_aarch64_cmtstv4si (__a, __b);
24208 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24209 vtstq_s64 (int64x2_t __a, int64x2_t __b)
24211 return (uint64x2_t) __builtin_aarch64_cmtstv2di (__a, __b);
24214 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24215 vtstq_u8 (uint8x16_t __a, uint8x16_t __b)
24217 return (uint8x16_t) __builtin_aarch64_cmtstv16qi ((int8x16_t) __a,
24218 (int8x16_t) __b);
24221 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24222 vtstq_u16 (uint16x8_t __a, uint16x8_t __b)
24224 return (uint16x8_t) __builtin_aarch64_cmtstv8hi ((int16x8_t) __a,
24225 (int16x8_t) __b);
24228 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24229 vtstq_u32 (uint32x4_t __a, uint32x4_t __b)
24231 return (uint32x4_t) __builtin_aarch64_cmtstv4si ((int32x4_t) __a,
24232 (int32x4_t) __b);
24235 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24236 vtstq_u64 (uint64x2_t __a, uint64x2_t __b)
24238 return (uint64x2_t) __builtin_aarch64_cmtstv2di ((int64x2_t) __a,
24239 (int64x2_t) __b);
24242 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24243 vtstd_s64 (int64_t __a, int64_t __b)
24245 return (__a & __b) ? -1ll : 0ll;
24248 __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
24249 vtstd_u64 (uint64_t __a, uint64_t __b)
24251 return (__a & __b) ? -1ll : 0ll;
24254 /* vuqadd */
24256 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24257 vuqadd_s8 (int8x8_t __a, uint8x8_t __b)
24259 return __builtin_aarch64_suqaddv8qi_ssu (__a, __b);
24262 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24263 vuqadd_s16 (int16x4_t __a, uint16x4_t __b)
24265 return __builtin_aarch64_suqaddv4hi_ssu (__a, __b);
24268 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24269 vuqadd_s32 (int32x2_t __a, uint32x2_t __b)
24271 return __builtin_aarch64_suqaddv2si_ssu (__a, __b);
24274 __extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
24275 vuqadd_s64 (int64x1_t __a, uint64x1_t __b)
24277 return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])};
24280 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24281 vuqaddq_s8 (int8x16_t __a, uint8x16_t __b)
24283 return __builtin_aarch64_suqaddv16qi_ssu (__a, __b);
24286 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24287 vuqaddq_s16 (int16x8_t __a, uint16x8_t __b)
24289 return __builtin_aarch64_suqaddv8hi_ssu (__a, __b);
24292 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24293 vuqaddq_s32 (int32x4_t __a, uint32x4_t __b)
24295 return __builtin_aarch64_suqaddv4si_ssu (__a, __b);
24298 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24299 vuqaddq_s64 (int64x2_t __a, uint64x2_t __b)
24301 return __builtin_aarch64_suqaddv2di_ssu (__a, __b);
24304 __extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
24305 vuqaddb_s8 (int8x1_t __a, uint8x1_t __b)
24307 return __builtin_aarch64_suqaddqi_ssu (__a, __b);
24310 __extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
24311 vuqaddh_s16 (int16x1_t __a, uint16x1_t __b)
24313 return __builtin_aarch64_suqaddhi_ssu (__a, __b);
24316 __extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
24317 vuqadds_s32 (int32x1_t __a, uint32x1_t __b)
24319 return __builtin_aarch64_suqaddsi_ssu (__a, __b);
24322 __extension__ static __inline int64_t __attribute__ ((__always_inline__))
24323 vuqaddd_s64 (int64_t __a, uint64_t __b)
24325 return __builtin_aarch64_suqadddi_ssu (__a, __b);
24328 #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \
24329 __extension__ static __inline rettype \
24330 __attribute__ ((__always_inline__)) \
24331 v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \
24333 return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \
24334 v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \
24337 #define __INTERLEAVE_LIST(op) \
24338 __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \
24339 __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \
24340 __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \
24341 __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \
24342 __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \
24343 __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \
24344 __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \
24345 __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \
24346 __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \
24347 __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \
24348 __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \
24349 __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \
24350 __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \
24351 __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \
24352 __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \
24353 __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \
24354 __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \
24355 __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q)
24357 /* vuzp */
24359 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24360 vuzp1_f32 (float32x2_t __a, float32x2_t __b)
24362 #ifdef __AARCH64EB__
24363 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24364 #else
24365 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24366 #endif
24369 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24370 vuzp1_p8 (poly8x8_t __a, poly8x8_t __b)
24372 #ifdef __AARCH64EB__
24373 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24374 #else
24375 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24376 #endif
24379 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24380 vuzp1_p16 (poly16x4_t __a, poly16x4_t __b)
24382 #ifdef __AARCH64EB__
24383 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24384 #else
24385 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24386 #endif
24389 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24390 vuzp1_s8 (int8x8_t __a, int8x8_t __b)
24392 #ifdef __AARCH64EB__
24393 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24394 #else
24395 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24396 #endif
24399 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24400 vuzp1_s16 (int16x4_t __a, int16x4_t __b)
24402 #ifdef __AARCH64EB__
24403 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24404 #else
24405 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24406 #endif
24409 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24410 vuzp1_s32 (int32x2_t __a, int32x2_t __b)
24412 #ifdef __AARCH64EB__
24413 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24414 #else
24415 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24416 #endif
24419 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24420 vuzp1_u8 (uint8x8_t __a, uint8x8_t __b)
24422 #ifdef __AARCH64EB__
24423 return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24424 #else
24425 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24426 #endif
24429 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24430 vuzp1_u16 (uint16x4_t __a, uint16x4_t __b)
24432 #ifdef __AARCH64EB__
24433 return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3});
24434 #else
24435 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6});
24436 #endif
24439 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24440 vuzp1_u32 (uint32x2_t __a, uint32x2_t __b)
24442 #ifdef __AARCH64EB__
24443 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24444 #else
24445 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24446 #endif
24449 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24450 vuzp1q_f32 (float32x4_t __a, float32x4_t __b)
24452 #ifdef __AARCH64EB__
24453 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24454 #else
24455 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24456 #endif
24459 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24460 vuzp1q_f64 (float64x2_t __a, float64x2_t __b)
24462 #ifdef __AARCH64EB__
24463 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24464 #else
24465 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24466 #endif
24469 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24470 vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b)
24472 #ifdef __AARCH64EB__
24473 return __builtin_shuffle (__a, __b, (uint8x16_t)
24474 {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24475 #else
24476 return __builtin_shuffle (__a, __b, (uint8x16_t)
24477 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24478 #endif
24481 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24482 vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b)
24484 #ifdef __AARCH64EB__
24485 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24486 #else
24487 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24488 #endif
24491 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24492 vuzp1q_s8 (int8x16_t __a, int8x16_t __b)
24494 #ifdef __AARCH64EB__
24495 return __builtin_shuffle (__a, __b,
24496 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24497 #else
24498 return __builtin_shuffle (__a, __b,
24499 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24500 #endif
24503 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24504 vuzp1q_s16 (int16x8_t __a, int16x8_t __b)
24506 #ifdef __AARCH64EB__
24507 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24508 #else
24509 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24510 #endif
24513 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24514 vuzp1q_s32 (int32x4_t __a, int32x4_t __b)
24516 #ifdef __AARCH64EB__
24517 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24518 #else
24519 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24520 #endif
24523 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24524 vuzp1q_s64 (int64x2_t __a, int64x2_t __b)
24526 #ifdef __AARCH64EB__
24527 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24528 #else
24529 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24530 #endif
24533 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24534 vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b)
24536 #ifdef __AARCH64EB__
24537 return __builtin_shuffle (__a, __b,
24538 (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15});
24539 #else
24540 return __builtin_shuffle (__a, __b,
24541 (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
24542 #endif
24545 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24546 vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b)
24548 #ifdef __AARCH64EB__
24549 return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7});
24550 #else
24551 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14});
24552 #endif
24555 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24556 vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b)
24558 #ifdef __AARCH64EB__
24559 return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3});
24560 #else
24561 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6});
24562 #endif
24565 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24566 vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
24568 #ifdef __AARCH64EB__
24569 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24570 #else
24571 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24572 #endif
24575 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24576 vuzp2_f32 (float32x2_t __a, float32x2_t __b)
24578 #ifdef __AARCH64EB__
24579 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24580 #else
24581 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24582 #endif
24585 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24586 vuzp2_p8 (poly8x8_t __a, poly8x8_t __b)
24588 #ifdef __AARCH64EB__
24589 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24590 #else
24591 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24592 #endif
24595 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24596 vuzp2_p16 (poly16x4_t __a, poly16x4_t __b)
24598 #ifdef __AARCH64EB__
24599 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24600 #else
24601 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24602 #endif
24605 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24606 vuzp2_s8 (int8x8_t __a, int8x8_t __b)
24608 #ifdef __AARCH64EB__
24609 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24610 #else
24611 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24612 #endif
24615 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24616 vuzp2_s16 (int16x4_t __a, int16x4_t __b)
24618 #ifdef __AARCH64EB__
24619 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24620 #else
24621 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24622 #endif
24625 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24626 vuzp2_s32 (int32x2_t __a, int32x2_t __b)
24628 #ifdef __AARCH64EB__
24629 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24630 #else
24631 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24632 #endif
24635 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24636 vuzp2_u8 (uint8x8_t __a, uint8x8_t __b)
24638 #ifdef __AARCH64EB__
24639 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24640 #else
24641 return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24642 #endif
24645 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24646 vuzp2_u16 (uint16x4_t __a, uint16x4_t __b)
24648 #ifdef __AARCH64EB__
24649 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2});
24650 #else
24651 return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7});
24652 #endif
24655 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24656 vuzp2_u32 (uint32x2_t __a, uint32x2_t __b)
24658 #ifdef __AARCH64EB__
24659 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
24660 #else
24661 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
24662 #endif
24665 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24666 vuzp2q_f32 (float32x4_t __a, float32x4_t __b)
24668 #ifdef __AARCH64EB__
24669 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24670 #else
24671 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24672 #endif
24675 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24676 vuzp2q_f64 (float64x2_t __a, float64x2_t __b)
24678 #ifdef __AARCH64EB__
24679 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24680 #else
24681 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24682 #endif
24685 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24686 vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b)
24688 #ifdef __AARCH64EB__
24689 return __builtin_shuffle (__a, __b,
24690 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24691 #else
24692 return __builtin_shuffle (__a, __b,
24693 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24694 #endif
24697 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24698 vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b)
24700 #ifdef __AARCH64EB__
24701 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24702 #else
24703 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24704 #endif
24707 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24708 vuzp2q_s8 (int8x16_t __a, int8x16_t __b)
24710 #ifdef __AARCH64EB__
24711 return __builtin_shuffle (__a, __b,
24712 (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24713 #else
24714 return __builtin_shuffle (__a, __b,
24715 (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24716 #endif
24719 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24720 vuzp2q_s16 (int16x8_t __a, int16x8_t __b)
24722 #ifdef __AARCH64EB__
24723 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24724 #else
24725 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24726 #endif
24729 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24730 vuzp2q_s32 (int32x4_t __a, int32x4_t __b)
24732 #ifdef __AARCH64EB__
24733 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24734 #else
24735 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24736 #endif
24739 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24740 vuzp2q_s64 (int64x2_t __a, int64x2_t __b)
24742 #ifdef __AARCH64EB__
24743 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24744 #else
24745 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24746 #endif
24749 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24750 vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b)
24752 #ifdef __AARCH64EB__
24753 return __builtin_shuffle (__a, __b, (uint8x16_t)
24754 {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14});
24755 #else
24756 return __builtin_shuffle (__a, __b, (uint8x16_t)
24757 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31});
24758 #endif
24761 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24762 vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b)
24764 #ifdef __AARCH64EB__
24765 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6});
24766 #else
24767 return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15});
24768 #endif
24771 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24772 vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b)
24774 #ifdef __AARCH64EB__
24775 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2});
24776 #else
24777 return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7});
24778 #endif
24781 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
24782 vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
24784 #ifdef __AARCH64EB__
24785 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
24786 #else
24787 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
24788 #endif
24791 __INTERLEAVE_LIST (uzp)
24793 /* vzip */
24795 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
24796 vzip1_f32 (float32x2_t __a, float32x2_t __b)
24798 #ifdef __AARCH64EB__
24799 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24800 #else
24801 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24802 #endif
24805 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
24806 vzip1_p8 (poly8x8_t __a, poly8x8_t __b)
24808 #ifdef __AARCH64EB__
24809 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24810 #else
24811 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24812 #endif
24815 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
24816 vzip1_p16 (poly16x4_t __a, poly16x4_t __b)
24818 #ifdef __AARCH64EB__
24819 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24820 #else
24821 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24822 #endif
24825 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
24826 vzip1_s8 (int8x8_t __a, int8x8_t __b)
24828 #ifdef __AARCH64EB__
24829 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24830 #else
24831 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24832 #endif
24835 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
24836 vzip1_s16 (int16x4_t __a, int16x4_t __b)
24838 #ifdef __AARCH64EB__
24839 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24840 #else
24841 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24842 #endif
24845 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
24846 vzip1_s32 (int32x2_t __a, int32x2_t __b)
24848 #ifdef __AARCH64EB__
24849 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24850 #else
24851 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24852 #endif
24855 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
24856 vzip1_u8 (uint8x8_t __a, uint8x8_t __b)
24858 #ifdef __AARCH64EB__
24859 return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7});
24860 #else
24861 return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24862 #endif
24865 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
24866 vzip1_u16 (uint16x4_t __a, uint16x4_t __b)
24868 #ifdef __AARCH64EB__
24869 return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3});
24870 #else
24871 return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5});
24872 #endif
24875 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
24876 vzip1_u32 (uint32x2_t __a, uint32x2_t __b)
24878 #ifdef __AARCH64EB__
24879 return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1});
24880 #else
24881 return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2});
24882 #endif
24885 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
24886 vzip1q_f32 (float32x4_t __a, float32x4_t __b)
24888 #ifdef __AARCH64EB__
24889 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24890 #else
24891 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24892 #endif
24895 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
24896 vzip1q_f64 (float64x2_t __a, float64x2_t __b)
24898 #ifdef __AARCH64EB__
24899 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24900 #else
24901 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24902 #endif
24905 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
24906 vzip1q_p8 (poly8x16_t __a, poly8x16_t __b)
24908 #ifdef __AARCH64EB__
24909 return __builtin_shuffle (__a, __b, (uint8x16_t)
24910 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24911 #else
24912 return __builtin_shuffle (__a, __b, (uint8x16_t)
24913 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24914 #endif
24917 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
24918 vzip1q_p16 (poly16x8_t __a, poly16x8_t __b)
24920 #ifdef __AARCH64EB__
24921 return __builtin_shuffle (__a, __b, (uint16x8_t)
24922 {12, 4, 13, 5, 14, 6, 15, 7});
24923 #else
24924 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24925 #endif
24928 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
24929 vzip1q_s8 (int8x16_t __a, int8x16_t __b)
24931 #ifdef __AARCH64EB__
24932 return __builtin_shuffle (__a, __b, (uint8x16_t)
24933 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24934 #else
24935 return __builtin_shuffle (__a, __b, (uint8x16_t)
24936 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24937 #endif
24940 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
24941 vzip1q_s16 (int16x8_t __a, int16x8_t __b)
24943 #ifdef __AARCH64EB__
24944 return __builtin_shuffle (__a, __b, (uint16x8_t)
24945 {12, 4, 13, 5, 14, 6, 15, 7});
24946 #else
24947 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24948 #endif
24951 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
24952 vzip1q_s32 (int32x4_t __a, int32x4_t __b)
24954 #ifdef __AARCH64EB__
24955 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24956 #else
24957 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
24958 #endif
24961 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
24962 vzip1q_s64 (int64x2_t __a, int64x2_t __b)
24964 #ifdef __AARCH64EB__
24965 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
24966 #else
24967 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
24968 #endif
24971 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
24972 vzip1q_u8 (uint8x16_t __a, uint8x16_t __b)
24974 #ifdef __AARCH64EB__
24975 return __builtin_shuffle (__a, __b, (uint8x16_t)
24976 {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15});
24977 #else
24978 return __builtin_shuffle (__a, __b, (uint8x16_t)
24979 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23});
24980 #endif
24983 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
24984 vzip1q_u16 (uint16x8_t __a, uint16x8_t __b)
24986 #ifdef __AARCH64EB__
24987 return __builtin_shuffle (__a, __b, (uint16x8_t)
24988 {12, 4, 13, 5, 14, 6, 15, 7});
24989 #else
24990 return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11});
24991 #endif
24994 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
24995 vzip1q_u32 (uint32x4_t __a, uint32x4_t __b)
24997 #ifdef __AARCH64EB__
24998 return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3});
24999 #else
25000 return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5});
25001 #endif
25004 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25005 vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
25007 #ifdef __AARCH64EB__
25008 return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1});
25009 #else
25010 return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2});
25011 #endif
25014 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
25015 vzip2_f32 (float32x2_t __a, float32x2_t __b)
25017 #ifdef __AARCH64EB__
25018 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25019 #else
25020 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25021 #endif
25024 __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
25025 vzip2_p8 (poly8x8_t __a, poly8x8_t __b)
25027 #ifdef __AARCH64EB__
25028 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25029 #else
25030 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25031 #endif
25034 __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
25035 vzip2_p16 (poly16x4_t __a, poly16x4_t __b)
25037 #ifdef __AARCH64EB__
25038 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25039 #else
25040 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25041 #endif
25044 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
25045 vzip2_s8 (int8x8_t __a, int8x8_t __b)
25047 #ifdef __AARCH64EB__
25048 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25049 #else
25050 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25051 #endif
25054 __extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
25055 vzip2_s16 (int16x4_t __a, int16x4_t __b)
25057 #ifdef __AARCH64EB__
25058 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25059 #else
25060 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25061 #endif
25064 __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
25065 vzip2_s32 (int32x2_t __a, int32x2_t __b)
25067 #ifdef __AARCH64EB__
25068 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25069 #else
25070 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25071 #endif
25074 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
25075 vzip2_u8 (uint8x8_t __a, uint8x8_t __b)
25077 #ifdef __AARCH64EB__
25078 return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25079 #else
25080 return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15});
25081 #endif
25084 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
25085 vzip2_u16 (uint16x4_t __a, uint16x4_t __b)
25087 #ifdef __AARCH64EB__
25088 return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1});
25089 #else
25090 return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7});
25091 #endif
25094 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
25095 vzip2_u32 (uint32x2_t __a, uint32x2_t __b)
25097 #ifdef __AARCH64EB__
25098 return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0});
25099 #else
25100 return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3});
25101 #endif
25104 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
25105 vzip2q_f32 (float32x4_t __a, float32x4_t __b)
25107 #ifdef __AARCH64EB__
25108 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25109 #else
25110 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25111 #endif
25114 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
25115 vzip2q_f64 (float64x2_t __a, float64x2_t __b)
25117 #ifdef __AARCH64EB__
25118 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25119 #else
25120 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25121 #endif
25124 __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
25125 vzip2q_p8 (poly8x16_t __a, poly8x16_t __b)
25127 #ifdef __AARCH64EB__
25128 return __builtin_shuffle (__a, __b, (uint8x16_t)
25129 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25130 #else
25131 return __builtin_shuffle (__a, __b, (uint8x16_t)
25132 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25133 #endif
25136 __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
25137 vzip2q_p16 (poly16x8_t __a, poly16x8_t __b)
25139 #ifdef __AARCH64EB__
25140 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25141 #else
25142 return __builtin_shuffle (__a, __b, (uint16x8_t)
25143 {4, 12, 5, 13, 6, 14, 7, 15});
25144 #endif
25147 __extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
25148 vzip2q_s8 (int8x16_t __a, int8x16_t __b)
25150 #ifdef __AARCH64EB__
25151 return __builtin_shuffle (__a, __b, (uint8x16_t)
25152 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25153 #else
25154 return __builtin_shuffle (__a, __b, (uint8x16_t)
25155 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25156 #endif
25159 __extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
25160 vzip2q_s16 (int16x8_t __a, int16x8_t __b)
25162 #ifdef __AARCH64EB__
25163 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25164 #else
25165 return __builtin_shuffle (__a, __b, (uint16x8_t)
25166 {4, 12, 5, 13, 6, 14, 7, 15});
25167 #endif
25170 __extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
25171 vzip2q_s32 (int32x4_t __a, int32x4_t __b)
25173 #ifdef __AARCH64EB__
25174 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25175 #else
25176 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25177 #endif
25180 __extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
25181 vzip2q_s64 (int64x2_t __a, int64x2_t __b)
25183 #ifdef __AARCH64EB__
25184 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25185 #else
25186 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25187 #endif
25190 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
25191 vzip2q_u8 (uint8x16_t __a, uint8x16_t __b)
25193 #ifdef __AARCH64EB__
25194 return __builtin_shuffle (__a, __b, (uint8x16_t)
25195 {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7});
25196 #else
25197 return __builtin_shuffle (__a, __b, (uint8x16_t)
25198 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31});
25199 #endif
25202 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
25203 vzip2q_u16 (uint16x8_t __a, uint16x8_t __b)
25205 #ifdef __AARCH64EB__
25206 return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3});
25207 #else
25208 return __builtin_shuffle (__a, __b, (uint16x8_t)
25209 {4, 12, 5, 13, 6, 14, 7, 15});
25210 #endif
25213 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
25214 vzip2q_u32 (uint32x4_t __a, uint32x4_t __b)
25216 #ifdef __AARCH64EB__
25217 return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1});
25218 #else
25219 return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7});
25220 #endif
25223 __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
25224 vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
25226 #ifdef __AARCH64EB__
25227 return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0});
25228 #else
25229 return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3});
25230 #endif
25233 __INTERLEAVE_LIST (zip)
25235 #undef __INTERLEAVE_LIST
25236 #undef __DEFINTERLEAVE
25238 /* End of optimal implementations in approved order. */
25240 #undef __aarch64_vget_lane_any
25241 #undef __aarch64_vget_lane_f32
25242 #undef __aarch64_vget_lane_f64
25243 #undef __aarch64_vget_lane_p8
25244 #undef __aarch64_vget_lane_p16
25245 #undef __aarch64_vget_lane_s8
25246 #undef __aarch64_vget_lane_s16
25247 #undef __aarch64_vget_lane_s32
25248 #undef __aarch64_vget_lane_s64
25249 #undef __aarch64_vget_lane_u8
25250 #undef __aarch64_vget_lane_u16
25251 #undef __aarch64_vget_lane_u32
25252 #undef __aarch64_vget_lane_u64
25254 #undef __aarch64_vgetq_lane_f32
25255 #undef __aarch64_vgetq_lane_f64
25256 #undef __aarch64_vgetq_lane_p8
25257 #undef __aarch64_vgetq_lane_p16
25258 #undef __aarch64_vgetq_lane_s8
25259 #undef __aarch64_vgetq_lane_s16
25260 #undef __aarch64_vgetq_lane_s32
25261 #undef __aarch64_vgetq_lane_s64
25262 #undef __aarch64_vgetq_lane_u8
25263 #undef __aarch64_vgetq_lane_u16
25264 #undef __aarch64_vgetq_lane_u32
25265 #undef __aarch64_vgetq_lane_u64
25267 #undef __aarch64_vdup_lane_any
25268 #undef __aarch64_vdup_lane_f32
25269 #undef __aarch64_vdup_lane_f64
25270 #undef __aarch64_vdup_lane_p8
25271 #undef __aarch64_vdup_lane_p16
25272 #undef __aarch64_vdup_lane_s8
25273 #undef __aarch64_vdup_lane_s16
25274 #undef __aarch64_vdup_lane_s32
25275 #undef __aarch64_vdup_lane_s64
25276 #undef __aarch64_vdup_lane_u8
25277 #undef __aarch64_vdup_lane_u16
25278 #undef __aarch64_vdup_lane_u32
25279 #undef __aarch64_vdup_lane_u64
25280 #undef __aarch64_vdup_laneq_f32
25281 #undef __aarch64_vdup_laneq_f64
25282 #undef __aarch64_vdup_laneq_p8
25283 #undef __aarch64_vdup_laneq_p16
25284 #undef __aarch64_vdup_laneq_s8
25285 #undef __aarch64_vdup_laneq_s16
25286 #undef __aarch64_vdup_laneq_s32
25287 #undef __aarch64_vdup_laneq_s64
25288 #undef __aarch64_vdup_laneq_u8
25289 #undef __aarch64_vdup_laneq_u16
25290 #undef __aarch64_vdup_laneq_u32
25291 #undef __aarch64_vdup_laneq_u64
25292 #undef __aarch64_vdupq_lane_f32
25293 #undef __aarch64_vdupq_lane_f64
25294 #undef __aarch64_vdupq_lane_p8
25295 #undef __aarch64_vdupq_lane_p16
25296 #undef __aarch64_vdupq_lane_s8
25297 #undef __aarch64_vdupq_lane_s16
25298 #undef __aarch64_vdupq_lane_s32
25299 #undef __aarch64_vdupq_lane_s64
25300 #undef __aarch64_vdupq_lane_u8
25301 #undef __aarch64_vdupq_lane_u16
25302 #undef __aarch64_vdupq_lane_u32
25303 #undef __aarch64_vdupq_lane_u64
25304 #undef __aarch64_vdupq_laneq_f32
25305 #undef __aarch64_vdupq_laneq_f64
25306 #undef __aarch64_vdupq_laneq_p8
25307 #undef __aarch64_vdupq_laneq_p16
25308 #undef __aarch64_vdupq_laneq_s8
25309 #undef __aarch64_vdupq_laneq_s16
25310 #undef __aarch64_vdupq_laneq_s32
25311 #undef __aarch64_vdupq_laneq_s64
25312 #undef __aarch64_vdupq_laneq_u8
25313 #undef __aarch64_vdupq_laneq_u16
25314 #undef __aarch64_vdupq_laneq_u32
25315 #undef __aarch64_vdupq_laneq_u64
25317 #endif